helper_fns.c 23.3 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
/*
 *
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"
#include "datatype.h"

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===

categories:
    - name        : FAULT_TOLERANCE
      description : cvars that control fault tolerance behavior

cvars:
    - name        : MPIR_CVAR_ENABLE_COLL_FT_RET
      category    : FAULT_TOLERANCE
      type        : boolean
      default     : true
      class       : device
      verbosity   : MPI_T_VERBOSITY_USER_BASIC
      scope       : MPI_T_SCOPE_ALL_EQ
      description : >-
        DEPRECATED! Will be removed in MPICH-3.2
        Collectives called on a communicator with a failed process
        should not hang, however the result of the operation may be
        invalid even though the function returns MPI_SUCCESS.  This
        option enables an experimental feature that will return an error
        if the result of the collective is invalid.

=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

37
38
#define COPY_BUFFER_SZ 16384

39
40
41
42
43
/* These functions are used in the implementation of collective
   operations. They are wrappers around MPID send/recv functions. They do
   sends/receives by setting the context offset to
   MPID_CONTEXT_INTRA_COLL or MPID_CONTEXT_INTER_COLL. */

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#undef FUNCNAME
#define FUNCNAME MPIC_Probe
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno = MPI_SUCCESS;
    int context_id;
    MPID_Comm *comm_ptr;

    MPID_Comm_get_ptr( comm, comm_ptr );

    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
    
    mpi_errno = MPID_Probe(source, tag, comm_ptr, context_id, status);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


69
70
71
72
#undef FUNCNAME
#define FUNCNAME MPIR_Localcopy
#undef FCNAME
#define FCNAME "MPIR_Localcopy"
73
int MPIR_Localcopy(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
74
75
                   void *recvbuf, int recvcount, MPI_Datatype recvtype)
{
76
77
78
    int mpi_errno = MPI_SUCCESS;
    int sendtype_iscontig, recvtype_iscontig;
    MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
79
    MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
80
    MPIU_CHKLMEM_DECL(1);
81
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
82

83
84
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);

85
86
    MPID_Datatype_get_size_macro(sendtype, sendsize);
    MPID_Datatype_get_size_macro(recvtype, recvsize);
87

88
89
    sdata_sz = sendsize * sendcount;
    rdata_sz = recvsize * recvcount;
90

91
    /* if there is no data to copy, bail out */
92
93
    if (!sdata_sz || !rdata_sz)
        goto fn_exit;
94
95
96

#if defined(HAVE_ERROR_CHECKING)
    if (sdata_sz > rdata_sz) {
97
98
99
100
        MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz);
        copy_sz = rdata_sz;
    }
    else
101
#endif /* HAVE_ERROR_CHECKING */
102
        copy_sz = sdata_sz;
103

104
105
106
    /* Builtin types is the common case; optimize for it */
    if ((HANDLE_GET_KIND(sendtype) == HANDLE_KIND_BUILTIN) &&
        HANDLE_GET_KIND(recvtype) == HANDLE_KIND_BUILTIN) {
107
108
        MPIU_Memcpy(recvbuf, sendbuf, copy_sz);
        goto fn_exit;
109
    }
110

111
112
113
    MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
    MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);

114
115
    MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
116
117

    if (sendtype_iscontig && recvtype_iscontig)
118
119
120
121
122
123
124
125
126
    {
#if defined(HAVE_ERROR_CHECKING)
        MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno,
                                  ((char *)recvbuf + recvtype_true_lb),
                                  ((char *)sendbuf + sendtype_true_lb),
                                  copy_sz);
#endif
        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
               ((char *) sendbuf + sendtype_true_lb),
127
               copy_sz);
128
    }
129
130
131
    else if (sendtype_iscontig)
    {
        MPID_Segment seg;
132
	MPI_Aint last;
133
134
135
136

	MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb);
137
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
138
139
140
141
    }
    else if (recvtype_iscontig)
    {
        MPID_Segment seg;
142
	MPI_Aint last;
143
144
145
146
147
148
149
150
151
152
153
154
155
156

	MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;
157
158

        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	while (1)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = copy_sz;
	    }
	    
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_Assert(last > sfirst);
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_Assert(last > rfirst);

	    rfirst = last;

	    if (rfirst == copy_sz)
	    {
		/* successful completion */
		break;
	    }

            /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */
            MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");        

            /* if not all data was unpacked, copy it to the front of the buffer for next time */
	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}
208
209
    }
    
210
    
211
  fn_exit:
212
    MPIU_CHKLMEM_FREEALL();
213
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
214
215
216
217
218
219
220
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}


221
222
223
224
/* FIXME: For the brief-global and finer-grain control, we must ensure that
   the global lock is *not* held when this routine is called. (unless we change
   progress_start/end to grab the lock, in which case we must *still* make
   sure that the lock is not held when this routine is called). */
225
226
227
228
229
230
#undef FUNCNAME
#define FUNCNAME MPIC_Wait
#undef FCNAME
#define FCNAME "MPIC_Wait"
int MPIC_Wait(MPID_Request * request_ptr)
{
Rajeev Thakur's avatar
Rajeev Thakur committed
231
    int mpi_errno = MPI_SUCCESS;
232
233
234
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAIT);

    MPIDI_PT2PT_FUNC_ENTER(MPID_STATE_MPIC_WAIT);
235
    if (!MPID_Request_is_complete(request_ptr))
236
237
238
239
    {
	MPID_Progress_state progress_state;
	
	MPID_Progress_start(&progress_state);
240
        while (!MPID_Request_is_complete(request_ptr))
241
242
243
244
245
246
247
248
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	}
	MPID_Progress_end(&progress_state);
    }

 fn_fail:
249
    /* --BEGIN ERROR HANDLING-- */
250
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_WAIT);
251
    return mpi_errno;
252
    /* --END ERROR HANDLING-- */
253
}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276


/* Fault-tolerance versions.  When a process fails, collectives will
   still complete, however the result may be invalid.  Processes
   directly communicating with the failed process can detect the
   failure, however another mechanism is needed to commuinicate the
   failure to other processes receiving the invalid data.  To do this
   we introduce the _ft versions of the MPIC_ helper functions.  These
   functions take a pointer to an error flag.  When this is set to
   TRUE, the send functions will communicate the failure to the
   receiver.  If a function detects a failure, either by getting a
   failure in the communication operation, or by receiving an error
   indicator from a remote process, it sets the error flag to TRUE.

   In this implementation, we indicate an error to a remote process by
   sending an empty message instead of the requested buffer.  When a
   process receives an empty message, it knows to set the error flag.
   We count on the fact that collectives that exchange data (as
   opposed to barrier) will never send an empty message.  The barrier
   collective will not communicate failure information this way, but
   this is OK since there is no data that can be received corrupted. */

#undef FUNCNAME
277
#define FUNCNAME MPIC_Send
278
279
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
280
int MPIC_Send(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
281
282
283
                 MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
284
285
286
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
287
288
289
290
291
292
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

293
294
295
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

296
    if (*errflag && MPIR_CVAR_ENABLE_COLL_FT_RET)
297
298
        MPIR_TAG_SET_ERROR_BIT(tag);

299
300
301
302
303
304
305
306
307
308
309
310
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Send(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
311

312
 fn_exit:
313
314
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
    return mpi_errno;
315
 fn_fail:
316
317
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
318
    goto fn_exit;
319
    /* --END ERROR HANDLING-- */
320
321
322
}

#undef FUNCNAME
323
#define FUNCNAME MPIC_Recv
324
325
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
326
int MPIC_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
327
328
329
                 MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
330
    int context_id;
331
    MPI_Status mystatus;
332
333
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
334
335
336
337
338
339
    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

340
341
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);
342

343
344
345
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
346

347
348
    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
349

350
351
    mpi_errno = MPID_Recv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, status, &request_ptr);
352
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
353
354
355
356
357
358
359
360
361
362
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno == MPI_SUCCESS) {
            *status = request_ptr->status;
            mpi_errno = request_ptr->status.MPI_ERROR;
        } else {
            MPIU_ERR_POP(mpi_errno);
        }
        MPID_Request_release(request_ptr);
    }
363

364
    if (!MPIR_CVAR_ENABLE_COLL_FT_RET) goto fn_exit;
365
366

    if (source != MPI_PROC_NULL) {
367
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
368
            *errflag = TRUE;
369
370
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
371
372
373
374
375
376
377
378
379
            MPIU_Assert(status->MPI_TAG == tag);
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
    return mpi_errno;
 fn_fail:
380
381
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
382
    goto fn_exit;
383
    /* --END ERROR HANDLING-- */
384
385
386
}

#undef FUNCNAME
387
#define FUNCNAME MPIC_Ssend
388
389
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
390
int MPIC_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
391
392
393
                  MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
394
395
396
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
397
398
399
400
401
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
402
403
404
405
406
407
408
409

    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
            "**countneg", "**countneg %d", count);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

410
    if (*errflag && MPIR_CVAR_ENABLE_COLL_FT_RET)
411
412
        MPIR_TAG_SET_ERROR_BIT(tag);

413
414
415
416
417
418
419
420
    mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
                           context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
421

422
 fn_exit:
423
424
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
    return mpi_errno;
425
 fn_fail:
426
427
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
428
    goto fn_exit;
429
    /* --END ERROR HANDLING-- */
430
431
432
}

#undef FUNCNAME
433
#define FUNCNAME MPIC_Sendrecv
434
435
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
436
int MPIC_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
437
438
439
440
441
                     int dest, int sendtag, void *recvbuf, int recvcount,
                     MPI_Datatype recvtype, int source, int recvtag,
                     MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
442
    int context_id;
443
    MPI_Status mystatus;
444
445
    MPID_Request *recv_req_ptr = NULL, *send_req_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
446
447
448
449
450
451
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

452
453
454
455
456
457
458
459
460
    MPIU_ERR_CHKANDJUMP1((sendcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", sendcount);
    MPIU_ERR_CHKANDJUMP1((recvcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", recvcount);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

461
    if (MPIR_CVAR_ENABLE_COLL_FT_RET) {
462
463
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
464
    }
465

466
467
    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
468
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, recvtag,
                           comm_ptr, context_id, &send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIC_Wait(send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) MPIU_ERR_POPFATAL(mpi_errno);

    *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);

484
    if (!MPIR_CVAR_ENABLE_COLL_FT_RET) goto fn_exit;
485
486

    if (source != MPI_PROC_NULL) {
487
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
488
            *errflag = TRUE;
489
490
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
491
492
493
494
495
496
497
498
499
500
501
502
503
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }
    
 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");

    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

504
505
506
/* NOTE: for regular collectives (as opposed to irregular collectives) calling
 * this function repeatedly will almost always be slower than performing the
 * equivalent inline because of the overhead of the repeated malloc/free */
507
#undef FUNCNAME
508
#define FUNCNAME MPIC_Sendrecv_replace
509
510
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
511
int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
512
513
514
515
516
517
                             int dest, int sendtag,
                             int source, int recvtag,
                             MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
518
519
520
521
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
522
523
    MPI_Aint tmpbuf_size = 0;
    MPI_Aint tmpbuf_count = 0;
524
525
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
526
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
527
528
529
530
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
531
532
533
534
535

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

536
537
538
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

539
    if (MPIR_CVAR_ENABLE_COLL_FT_RET) {
540
541
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
542
543
    }

544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL) {
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset, &sreq);
    if (mpi_errno != MPI_SUCCESS) {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive
         * request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS) {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    *status = rreq->status;

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

600
    if (!MPIR_CVAR_ENABLE_COLL_FT_RET) goto fn_exit;
601
602
603
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
    if (source != MPI_PROC_NULL) {
604
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
605
            *errflag = TRUE;
606
607
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
608
609
610
611
612
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }

 fn_exit:
613
    MPIU_CHKLMEM_FREEALL();
614
615
616
617
618
619
620
621
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
622
#define FUNCNAME MPIC_Isend
623
624
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
625
int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
626
627
628
                  MPI_Comm comm, MPI_Request *request, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
629
630
631
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
632
633
634
635
636
637
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

638
639
640
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

641
    if (*errflag && MPIR_CVAR_ENABLE_COLL_FT_RET)
642
643
        MPIR_TAG_SET_ERROR_BIT(tag);

644
645
646
647
648
649
650
651
652
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Isend(buf, count, datatype, dest, tag, comm_ptr,
            context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    *request = request_ptr->handle;
653

654
 fn_exit:
655
656
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
    return mpi_errno;
657
658
 fn_fail:
    goto fn_exit;
659
660
661
}

#undef FUNCNAME
662
#define FUNCNAME MPIC_Irecv
663
664
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
665
int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int source,
666
667
668
                  int tag, MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno = MPI_SUCCESS;
669
670
671
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
672
673
674
675
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);

676
677
678
679
680
681
682
683
684
685
686
687
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(buf, count, datatype, source, tag, comm_ptr,
            context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    *request = request_ptr->handle;
688

689
 fn_exit:
690
691
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
    return mpi_errno;
692
693
 fn_fail:
    goto fn_exit;
694
695
696
697
}


#undef FUNCNAME
698
#define FUNCNAME MPIC_Waitall
699
700
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
701
int MPIC_Waitall(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
702
703
704
705
706
707
708
709
710
711
712
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);

    MPIU_Assert(statuses != MPI_STATUSES_IGNORE);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

713
714
715
    /* The MPI_TAG field is not set for send operations, so if we want
       to check for the error bit in the tag below, we should initialize all
       tag fields here. */
716
717
718
    for (i = 0; i < numreq; ++i)
        statuses[i].MPI_TAG = 0;
    
719
720
721
    mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

722
    if (*errflag || !MPIR_CVAR_ENABLE_COLL_FT_RET)
723
724
725
        goto fn_exit;

    for (i = 0; i < numreq; ++i) {
726
        if (MPIR_TAG_CHECK_ERROR_BIT(statuses[i].MPI_TAG)) {
727
            *errflag = TRUE;
728
            MPIR_TAG_CLEAR_ERROR_BIT(statuses[i].MPI_TAG);
729
730
731
732
733
734
735
736
737
738
739
            break;
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}