mpid_rma.c 15.5 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6 7
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidimpl.h"
8
#include "mpidrma.h"
9

Xin Zhao's avatar
Xin Zhao committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===

cvars:
    - name        : MPIR_CVAR_CH3_RMA_SLOTS_SIZE
      category    : CH3
      type        : int
      default     : 262144
      class       : none
      verbosity   : MPI_T_VERBOSITY_USER_BASIC
      scope       : MPI_T_SCOPE_ALL_EQ
      description : >-
        Number of RMA slots during window creation. Each slot contains
        a linked list of target elements. The distribution of ranks among
        slots follows a round-robin pattern. Requires a positive value.

26 27 28 29 30 31 32 33 34 35 36 37
    - name        : MPIR_CVAR_CH3_RMA_LOCK_DATA_BYTES
      category    : CH3
      type        : int
      default     : 655360
      class       : none
      verbosity   : MPI_T_VERBOSITY_USER_BASIC
      scope       : MPI_T_SCOPE_ALL_EQ
      description : >-
        Size (in bytes) of available lock data this window can provided. If
        current buffered lock data is more than this value, the process will
        drop the upcoming operation data. Requires a positive calue.

Xin Zhao's avatar
Xin Zhao committed
38 39 40
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

41 42 43

MPIU_THREADSAFE_INIT_DECL(initRMAoptions);

Xin Zhao's avatar
Xin Zhao committed
44 45
MPIDI_RMA_Win_list_t *MPIDI_RMA_Win_list = NULL, *MPIDI_RMA_Win_list_tail = NULL;

Xin Zhao's avatar
Xin Zhao committed
46
static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model, MPID_Info *info,
47
                    MPID_Comm * comm_ptr, MPID_Win ** win_ptr);
48 49


50 51 52
#define MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr)                   \
    do {                                                        \
        /* Get ptr to RMAFns, which is embedded in MPID_Win */  \
53
        MPID_RMAFns *ftable         = &(*(win_ptr))->RMAFns;    \
54
        ftable->Win_free            = MPIDI_Win_free;           \
55 56
        ftable->Win_attach          = MPIDI_Win_attach;         \
        ftable->Win_detach          = MPIDI_Win_detach;         \
57 58
        ftable->Win_shared_query    = MPIDI_Win_shared_query;   \
                                                                \
59 60 61
        ftable->Win_set_info        = MPIDI_Win_set_info;       \
        ftable->Win_get_info        = MPIDI_Win_get_info;       \
                                                                \
62 63 64
        ftable->Put                 = MPIDI_Put;                \
        ftable->Get                 = MPIDI_Get;                \
        ftable->Accumulate          = MPIDI_Accumulate;         \
65 66 67
        ftable->Get_accumulate      = MPIDI_Get_accumulate;     \
        ftable->Fetch_and_op        = MPIDI_Fetch_and_op;       \
        ftable->Compare_and_swap    = MPIDI_Compare_and_swap;   \
68
                                                                \
69 70 71 72
        ftable->Rput                = MPIDI_Rput;               \
        ftable->Rget                = MPIDI_Rget;               \
        ftable->Raccumulate         = MPIDI_Raccumulate;        \
        ftable->Rget_accumulate     = MPIDI_Rget_accumulate;    \
73 74 75 76 77 78 79 80 81 82 83 84 85
                                                                \
        ftable->Win_fence           = MPIDI_Win_fence;          \
        ftable->Win_post            = MPIDI_Win_post;           \
        ftable->Win_start           = MPIDI_Win_start;          \
        ftable->Win_complete        = MPIDI_Win_complete;       \
        ftable->Win_wait            = MPIDI_Win_wait;           \
        ftable->Win_test            = MPIDI_Win_test;           \
                                                                \
        ftable->Win_lock            = MPIDI_Win_lock;           \
        ftable->Win_unlock          = MPIDI_Win_unlock;         \
        ftable->Win_lock_all        = MPIDI_Win_lock_all;       \
        ftable->Win_unlock_all      = MPIDI_Win_unlock_all;     \
                                                                \
86 87 88 89
        ftable->Win_flush           = MPIDI_Win_flush;          \
        ftable->Win_flush_all       = MPIDI_Win_flush_all;      \
        ftable->Win_flush_local     = MPIDI_Win_flush_local;    \
        ftable->Win_flush_local_all = MPIDI_Win_flush_local_all;\
90 91 92 93
        ftable->Win_sync            = MPIDI_Win_sync;           \
    } while (0)


94
#undef FUNCNAME
95
#define FUNCNAME MPID_Win_create
96 97
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
98 99
int MPID_Win_create(void *base, MPI_Aint size, int disp_unit, MPID_Info * info,
                    MPID_Comm * comm_ptr, MPID_Win ** win_ptr)
100
{
101
    int mpi_errno = MPI_SUCCESS;
102

103
    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_CREATE);
104

105 106
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_CREATE);

Wesley Bland's avatar
Wesley Bland committed
107 108
    /* Check to make sure the communicator hasn't already been revoked */
    if (comm_ptr->revoked) {
109
        MPIU_ERR_SETANDJUMP(mpi_errno, MPIX_ERR_REVOKED, "**revoked");
Wesley Bland's avatar
Wesley Bland committed
110 111
    }

112
    mpi_errno =
Xin Zhao's avatar
Xin Zhao committed
113
        win_init(size, disp_unit, MPI_WIN_FLAVOR_CREATE, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
114 115
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
116

117 118
    (*win_ptr)->base = base;

119 120 121
    mpi_errno = MPIDI_CH3U_Win_fns.create(base, size, disp_unit, info, comm_ptr, win_ptr);
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
122

123
  fn_fail:
124 125 126 127 128 129 130 131 132
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_CREATE);
    return mpi_errno;
}


#undef FUNCNAME
#define FUNCNAME MPID_Win_allocate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
133 134
int MPID_Win_allocate(MPI_Aint size, int disp_unit, MPID_Info * info,
                      MPID_Comm * comm_ptr, void *baseptr, MPID_Win ** win_ptr)
135
{
136
    int mpi_errno = MPI_SUCCESS;
137
    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE);
138

139 140
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE);

141
    mpi_errno =
Xin Zhao's avatar
Xin Zhao committed
142
        win_init(size, disp_unit, MPI_WIN_FLAVOR_ALLOCATE, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
143 144 145
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_POP(mpi_errno);
    }
146

147
    mpi_errno = MPIDI_CH3U_Win_fns.allocate(size, disp_unit, info, comm_ptr, baseptr, win_ptr);
148 149 150
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_POP(mpi_errno);
    }
151

152
  fn_fail:
153 154 155 156 157 158 159 160 161
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_ALLOCATE);
    return mpi_errno;
}


#undef FUNCNAME
#define FUNCNAME MPID_Win_create_dynamic
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
162
int MPID_Win_create_dynamic(MPID_Info * info, MPID_Comm * comm_ptr, MPID_Win ** win_ptr)
163
{
164
    int mpi_errno = MPI_SUCCESS;
165

166
    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
167

168
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
169

170 171
    mpi_errno = win_init(0 /* spec defines size to be 0 */ ,
                         1 /* spec defines disp_unit to be 1 */ ,
Xin Zhao's avatar
Xin Zhao committed
172
                         MPI_WIN_FLAVOR_DYNAMIC, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
173

174 175
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
176

177 178
    (*win_ptr)->base = MPI_BOTTOM;

179
    mpi_errno = MPIDI_CH3U_Win_fns.create_dynamic(info, comm_ptr, win_ptr);
180 181 182
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_POP(mpi_errno);
    }
183

184
  fn_fail:
185
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_CREATE_DYNAMIC);
186 187 188 189
    return mpi_errno;
}


190
/* The memory allocation functions */
191 192 193 194
#undef FUNCNAME
#define FUNCNAME MPID_Alloc_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
195
void *MPID_Alloc_mem(size_t size, MPID_Info * info_ptr)
196
{
197
    void *ap = NULL;
198 199 200 201
    MPIDI_STATE_DECL(MPID_STATE_MPID_ALLOC_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_ALLOC_MEM);

202
    ap = MPIDI_Alloc_mem(size, info_ptr);
203

204 205 206 207 208 209 210 211 212
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_ALLOC_MEM);
    return ap;
}


#undef FUNCNAME
#define FUNCNAME MPID_Free_mem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
213
int MPID_Free_mem(void *ptr)
214 215 216 217 218 219
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_FREE_MEM);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_FREE_MEM);

220
    mpi_errno = MPIDI_Free_mem(ptr);
221
    if (mpi_errno != MPI_SUCCESS) {
222
        MPIU_ERR_POP(mpi_errno);
223
    }
224 225

  fn_fail:
226
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_FREE_MEM);
227 228 229
    return mpi_errno;
}

230

231 232 233 234
#undef FUNCNAME
#define FUNCNAME MPID_Win_allocate_shared
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
235 236
int MPID_Win_allocate_shared(MPI_Aint size, int disp_unit, MPID_Info * info, MPID_Comm * comm_ptr,
                             void *base_ptr, MPID_Win ** win_ptr)
237
{
238
    int mpi_errno = MPI_SUCCESS;
239 240

    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
241

242 243
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);

244
    mpi_errno =
Xin Zhao's avatar
Xin Zhao committed
245
        win_init(size, disp_unit, MPI_WIN_FLAVOR_SHARED, MPI_WIN_UNIFIED, info, comm_ptr, win_ptr);
246 247
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
248

249 250 251 252
    mpi_errno =
        MPIDI_CH3U_Win_fns.allocate_shared(size, disp_unit, info, comm_ptr, base_ptr, win_ptr);
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
253

254
  fn_fail:
255 256 257
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_ALLOCATE_SHARED);
    return mpi_errno;
}
258 259 260 261 262 263


#undef FUNCNAME
#define FUNCNAME win_init
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
264
static int win_init(MPI_Aint size, int disp_unit, int create_flavor, int model, MPID_Info *info,
265
                    MPID_Comm * comm_ptr, MPID_Win ** win_ptr)
266 267
{
    int mpi_errno = MPI_SUCCESS;
268
    int i;
269
    MPID_Comm *win_comm_ptr;
270
    int win_target_pool_size;
Xin Zhao's avatar
Xin Zhao committed
271
    MPIDI_RMA_Win_list_t *win_elem;
272
    MPIU_CHKPMEM_DECL(5);
273 274 275 276
    MPIDI_STATE_DECL(MPID_STATE_WIN_INIT);

    MPIDI_FUNC_ENTER(MPID_STATE_WIN_INIT);

277
    if (initRMAoptions) {
278
        MPIU_THREADSAFE_INIT_BLOCK_BEGIN(initRMAoptions);
279

Xin Zhao's avatar
Xin Zhao committed
280 281
        MPIDI_CH3_RMA_Init_sync_pvars();
        MPIDI_CH3_RMA_Init_pkthandler_pvars();
282 283 284 285 286

        MPIU_THREADSAFE_INIT_CLEAR(initRMAoptions);
        MPIU_THREADSAFE_INIT_BLOCK_END(initRMAoptions);
    }

287 288 289
    *win_ptr = (MPID_Win *) MPIU_Handle_obj_alloc(&MPID_Win_mem);
    MPIU_ERR_CHKANDJUMP1(!(*win_ptr), mpi_errno, MPI_ERR_OTHER, "**nomem",
                         "**nomem %s", "MPID_Win_mem");
290 291

    mpi_errno = MPIR_Comm_dup_impl(comm_ptr, &win_comm_ptr);
292 293
    if (mpi_errno)
        MPIU_ERR_POP(mpi_errno);
294 295 296 297 298

    MPIU_Object_set_ref(*win_ptr, 1);

    /* (*win_ptr)->errhandler is set by upper level; */
    /* (*win_ptr)->base is set by caller; */
299 300 301 302 303 304
    (*win_ptr)->size = size;
    (*win_ptr)->disp_unit = disp_unit;
    (*win_ptr)->create_flavor = create_flavor;
    (*win_ptr)->model = model;
    (*win_ptr)->attributes = NULL;
    (*win_ptr)->comm_ptr = win_comm_ptr;
305

306
    (*win_ptr)->at_completion_counter = 0;
307 308 309 310
    /* (*win_ptr)->base_addrs[] is set by caller; */
    /* (*win_ptr)->sizes[] is set by caller; */
    /* (*win_ptr)->disp_units[] is set by caller; */
    /* (*win_ptr)->all_win_handles[] is set by caller; */
311
    (*win_ptr)->current_lock_type = MPID_LOCK_NONE;
312
    (*win_ptr)->shared_lock_ref_cnt = 0;
313
    (*win_ptr)->lock_queue = NULL;
314
    (*win_ptr)->lock_queue_tail = NULL;
315
    (*win_ptr)->shm_allocated = FALSE;
316 317
    (*win_ptr)->states.access_state = MPIDI_RMA_NONE;
    (*win_ptr)->states.exposure_state = MPIDI_RMA_NONE;
318
    (*win_ptr)->non_empty_slots = 0;
319
    (*win_ptr)->accumulated_ops_cnt = 0;
320
    (*win_ptr)->active_req_cnt = 0;
321 322 323 324 325 326 327
    (*win_ptr)->fence_sync_req = MPI_REQUEST_NULL;
    (*win_ptr)->start_req = NULL;
    (*win_ptr)->start_ranks_in_win_grp = NULL;
    (*win_ptr)->start_grp_size = 0;
    (*win_ptr)->lock_all_assert = 0;
    (*win_ptr)->lock_epoch_count = 0;
    (*win_ptr)->outstanding_locks = 0;
328
    (*win_ptr)->current_lock_data_bytes = 0;
329

330
    /* Initialize the info flags */
331
    (*win_ptr)->info_args.no_locks = 0;
332
    (*win_ptr)->info_args.accumulate_ordering = MPIDI_ACC_ORDER_RAR | MPIDI_ACC_ORDER_RAW |
333 334 335
        MPIDI_ACC_ORDER_WAR | MPIDI_ACC_ORDER_WAW;
    (*win_ptr)->info_args.accumulate_ops = MPIDI_ACC_OPS_SAME_OP_NO_OP;
    (*win_ptr)->info_args.same_size = 0;
336
    (*win_ptr)->info_args.alloc_shared_noncontig = 0;
337
    (*win_ptr)->info_args.alloc_shm = FALSE;
338

Xin Zhao's avatar
Xin Zhao committed
339 340 341 342 343 344 345
    /* Set function pointers on window */
    MPID_WIN_FTABLE_SET_DEFAULTS(win_ptr);

    /* Set info_args on window based on info provided by user */
    mpi_errno = (*win_ptr)->RMAFns.Win_set_info((*win_ptr), info);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

346
    MPIU_CHKPMEM_MALLOC((*win_ptr)->op_pool_start, MPIDI_RMA_Op_t *,
347 348 349 350 351 352 353 354 355
                        sizeof(MPIDI_RMA_Op_t) * MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE, mpi_errno,
                        "RMA op pool");
    (*win_ptr)->op_pool = NULL;
    (*win_ptr)->op_pool_tail = NULL;
    for (i = 0; i < MPIR_CVAR_CH3_RMA_OP_WIN_POOL_SIZE; i++) {
        (*win_ptr)->op_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN;
        MPL_LL_APPEND((*win_ptr)->op_pool, (*win_ptr)->op_pool_tail, &((*win_ptr)->op_pool_start[i]));
    }

356
    win_target_pool_size = MPIR_MIN(MPIR_CVAR_CH3_RMA_TARGET_WIN_POOL_SIZE, MPIR_Comm_size(win_comm_ptr));
357
    MPIU_CHKPMEM_MALLOC((*win_ptr)->target_pool_start, MPIDI_RMA_Target_t *,
358 359 360 361 362 363 364 365 366
                        sizeof(MPIDI_RMA_Target_t) * win_target_pool_size,
                        mpi_errno, "RMA target pool");
    (*win_ptr)->target_pool = NULL;
    (*win_ptr)->target_pool_tail = NULL;
    for (i = 0; i < win_target_pool_size; i++) {
        (*win_ptr)->target_pool_start[i].pool_type = MPIDI_RMA_POOL_WIN;
        MPL_LL_APPEND((*win_ptr)->target_pool, (*win_ptr)->target_pool_tail, &((*win_ptr)->target_pool_start[i]));
    }

Xin Zhao's avatar
Xin Zhao committed
367
    (*win_ptr)->num_slots = MPIR_MIN(MPIR_CVAR_CH3_RMA_SLOTS_SIZE, MPIR_Comm_size(win_comm_ptr));
368
    MPIU_CHKPMEM_MALLOC((*win_ptr)->slots, MPIDI_RMA_Slot_t *,
Xin Zhao's avatar
Xin Zhao committed
369 370 371 372 373 374
                        sizeof(MPIDI_RMA_Slot_t) * (*win_ptr)->num_slots, mpi_errno, "RMA slots");
    for (i = 0; i < (*win_ptr)->num_slots; i++) {
        (*win_ptr)->slots[i].target_list = NULL;
        (*win_ptr)->slots[i].target_list_tail = NULL;
    }

375 376
    /* FIXME: we can optimize by letting the user to pass WIN INFO hint if they will not use passive target,
       in such case we do not need to allocate window pool for lock entries. */
377 378
    MPIU_CHKPMEM_MALLOC((*win_ptr)->lock_entry_pool_start, MPIDI_RMA_Lock_entry_t *,
                        sizeof(MPIDI_RMA_Lock_entry_t) * MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE,
379 380 381 382 383 384 385 386
                        mpi_errno, "RMA lock entry pool");
    (*win_ptr)->lock_entry_pool = NULL;
    (*win_ptr)->lock_entry_pool_tail = NULL;
    for (i = 0; i < MPIR_CVAR_CH3_RMA_LOCK_ENTRY_WIN_POOL_SIZE; i++) {
        MPL_LL_APPEND((*win_ptr)->lock_entry_pool, (*win_ptr)->lock_entry_pool_tail,
                      &((*win_ptr)->lock_entry_pool_start[i]));
    }

Xin Zhao's avatar
Xin Zhao committed
387 388 389 390 391 392
    /* enqueue window into the global list */
    MPIU_CHKPMEM_MALLOC(win_elem, MPIDI_RMA_Win_list_t *, sizeof(MPIDI_RMA_Win_list_t), mpi_errno,
                        "Window list element");
    win_elem->win_ptr = *win_ptr;
    MPL_LL_APPEND(MPIDI_RMA_Win_list, MPIDI_RMA_Win_list_tail, win_elem);

393
  fn_exit:
394 395
    MPIDI_FUNC_EXIT(MPID_STATE_WIN_INIT);
    return mpi_errno;
396
  fn_fail:
397
    MPIU_CHKPMEM_REAP();
398 399
    goto fn_exit;
}
400 401 402 403 404 405


#undef FUNCNAME
#define FUNCNAME MPID_Win_set_info
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
406
int MPID_Win_set_info(MPID_Win * win, MPID_Info * info)
407 408 409 410 411 412 413
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_SET_INFO);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_WIN_SET_INFO);

    mpi_errno = win->RMAFns.Win_set_info(win, info);
414 415 416
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_POP(mpi_errno);
    }
417

418
  fn_exit:
419 420
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_SET_INFO);
    return mpi_errno;
421
  fn_fail:
422 423 424 425 426 427 428 429
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPID_Win_get_info
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
430
int MPID_Win_get_info(MPID_Win * win, MPID_Info ** info_used)
431 432 433 434 435 436 437
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_WIN_GET_INFO);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_WIN_GET_INFO);

    mpi_errno = win->RMAFns.Win_get_info(win, info_used);
438 439 440
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_POP(mpi_errno);
    }
441

442
  fn_exit:
443 444
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_WIN_GET_INFO);
    return mpi_errno;
445
  fn_fail:
446 447
    goto fn_exit;
}