helper_fns.c 28.1 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
/*
 *
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"
#include "datatype.h"

11
12
#define COPY_BUFFER_SZ 16384

13
14
15
16
17
/* These functions are used in the implementation of collective
   operations. They are wrappers around MPID send/recv functions. They do
   sends/receives by setting the context offset to
   MPID_CONTEXT_INTRA_COLL or MPID_CONTEXT_INTER_COLL. */

18
19
20
static int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                     MPI_Comm comm, MPI_Request *request);

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#undef FUNCNAME
#define FUNCNAME MPIC_Probe
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno = MPI_SUCCESS;
    int context_id;
    MPID_Comm *comm_ptr;

    MPID_Comm_get_ptr( comm, comm_ptr );

    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
    
    mpi_errno = MPID_Probe(source, tag, comm_ptr, context_id, status);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


46
47
48
49
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv
#undef FCNAME
#define FCNAME "MPIC_Sendrecv"
50
51
52
53
static int MPIC_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         int dest, int sendtag, void *recvbuf, int recvcount,
                         MPI_Datatype recvtype, int source, int recvtag,
                         MPI_Comm comm, MPI_Status *status)
54
55
{
    MPID_Request *recv_req_ptr=NULL, *send_req_ptr=NULL;
56
57
    int mpi_errno = MPI_SUCCESS;
    int context_id;
58
59
60
61
62
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV);

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV);

63
64
65
66
67
    MPIU_ERR_CHKANDJUMP1((sendcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", sendcount);
    MPIU_ERR_CHKANDJUMP1((recvcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", recvcount);

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, sendtag, 
                           comm_ptr, context_id, &send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

    mpi_errno = MPIC_Wait(send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POPFATAL(mpi_errno); }
    if (status != MPI_STATUS_IGNORE)
        *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);
 fn_fail:
91
    /* --BEGIN ERROR HANDLING-- */
92
93
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV);
    return mpi_errno;
94
    /* --END ERROR HANDLING-- */
95
96
}

97
98
99
100
101
102
103
/* NOTE: for regular collectives (as opposed to irregular collectives) calling
 * this function repeatedly will almost always be slower than performing the
 * equivalent inline because of the overhead of the repeated malloc/free */
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
104
105
106
107
static int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
                                 int dest, int sendtag,
                                 int source, int recvtag,
                                 MPI_Comm comm, MPI_Status *status)
108
109
110
111
112
113
114
115
116
117
118
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
    int tmpbuf_size = 0;
    int tmpbuf_count = 0;
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
119
120
121
122
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
123
124

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
125
126
127
128

    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

129
130
131
132
133
134
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL)
    {
135
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
136
137
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

138
        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset,
                           &sreq);
    if (mpi_errno != MPI_SUCCESS)
    {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

158
    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
159
160
161
162
    {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
163
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
        {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS)
            {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    if (status != MPI_STATUS_IGNORE) {
        *status = rreq->status;
    }

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
201
202
203
204
205

#undef FUNCNAME
#define FUNCNAME MPIR_Localcopy
#undef FCNAME
#define FCNAME "MPIR_Localcopy"
206
int MPIR_Localcopy(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
207
208
                   void *recvbuf, int recvcount, MPI_Datatype recvtype)
{
209
210
211
    int mpi_errno = MPI_SUCCESS;
    int sendtype_iscontig, recvtype_iscontig;
    MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
212
    MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
213
    MPIU_CHKLMEM_DECL(1);
214
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
215

216
217
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);

218
219
220
    MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
    MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);

221
222
223
224
    MPID_Datatype_get_size_macro(sendtype, sendsize);
    MPID_Datatype_get_size_macro(recvtype, recvsize);
    sdata_sz = sendsize * sendcount;
    rdata_sz = recvsize * recvcount;
225
226
227

    if (!sdata_sz || !rdata_sz)
        goto fn_exit;
228
229
    
    if (sdata_sz > rdata_sz)
230
    {
231
232
233
234
235
236
237
        MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz);
        copy_sz = rdata_sz;
    }
    else
    {
        copy_sz = sdata_sz;
    }
238

239
240
    MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
241
242

    if (sendtype_iscontig && recvtype_iscontig)
243
244
245
246
247
248
249
250
251
    {
#if defined(HAVE_ERROR_CHECKING)
        MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno,
                                  ((char *)recvbuf + recvtype_true_lb),
                                  ((char *)sendbuf + sendtype_true_lb),
                                  copy_sz);
#endif
        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
               ((char *) sendbuf + sendtype_true_lb),
252
               copy_sz);
253
    }
254
255
256
    else if (sendtype_iscontig)
    {
        MPID_Segment seg;
257
	MPI_Aint last;
258
259
260
261

	MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb);
262
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
263
264
265
266
    }
    else if (recvtype_iscontig)
    {
        MPID_Segment seg;
267
	MPI_Aint last;
268
269
270
271
272
273
274
275
276
277
278
279
280
281

	MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;
282
283

        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	while (1)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = copy_sz;
	    }
	    
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_Assert(last > sfirst);
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_Assert(last > rfirst);

	    rfirst = last;

	    if (rfirst == copy_sz)
	    {
		/* successful completion */
		break;
	    }

            /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */
            MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");        

            /* if not all data was unpacked, copy it to the front of the buffer for next time */
	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}
333
334
    }
    
335
    
336
  fn_exit:
337
    MPIU_CHKLMEM_FREEALL();
338
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
339
340
341
342
343
344
345
346
347
348
349
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPIC_Isend
#undef FCNAME
#define FCNAME "MPIC_Isend"
350
351
static int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                      MPI_Comm comm, MPI_Request *request)
352
{
353
354
    int mpi_errno = MPI_SUCCESS;
    int context_id;
355
356
357
358
359
360
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr=NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND);

    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_ISEND);

361
362
363
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

364
365
366
367
368
369
370
371
372
373
374
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Isend(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } 

    *request = request_ptr->handle;

 fn_fail:
375
    /* --BEGIN ERROR HANDLING-- */
376
377
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_ISEND);
    return mpi_errno;
378
    /* --END ERROR HANDLING-- */
379
380
381
}


382
383
384
385
/* FIXME: For the brief-global and finer-grain control, we must ensure that
   the global lock is *not* held when this routine is called. (unless we change
   progress_start/end to grab the lock, in which case we must *still* make
   sure that the lock is not held when this routine is called). */
386
387
388
389
390
391
#undef FUNCNAME
#define FUNCNAME MPIC_Wait
#undef FCNAME
#define FCNAME "MPIC_Wait"
int MPIC_Wait(MPID_Request * request_ptr)
{
Rajeev Thakur's avatar
Rajeev Thakur committed
392
    int mpi_errno = MPI_SUCCESS;
393
394
395
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAIT);

    MPIDI_PT2PT_FUNC_ENTER(MPID_STATE_MPIC_WAIT);
396
    if (!MPID_Request_is_complete(request_ptr))
397
398
399
400
    {
	MPID_Progress_state progress_state;
	
	MPID_Progress_start(&progress_state);
401
        while (!MPID_Request_is_complete(request_ptr))
402
403
404
405
406
407
408
409
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	}
	MPID_Progress_end(&progress_state);
    }

 fn_fail:
410
    /* --BEGIN ERROR HANDLING-- */
411
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_WAIT);
412
    return mpi_errno;
413
    /* --END ERROR HANDLING-- */
414
}
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440


/* Fault-tolerance versions.  When a process fails, collectives will
   still complete, however the result may be invalid.  Processes
   directly communicating with the failed process can detect the
   failure, however another mechanism is needed to commuinicate the
   failure to other processes receiving the invalid data.  To do this
   we introduce the _ft versions of the MPIC_ helper functions.  These
   functions take a pointer to an error flag.  When this is set to
   TRUE, the send functions will communicate the failure to the
   receiver.  If a function detects a failure, either by getting a
   failure in the communication operation, or by receiving an error
   indicator from a remote process, it sets the error flag to TRUE.

   In this implementation, we indicate an error to a remote process by
   sending an empty message instead of the requested buffer.  When a
   process receives an empty message, it knows to set the error flag.
   We count on the fact that collectives that exchange data (as
   opposed to barrier) will never send an empty message.  The barrier
   collective will not communicate failure information this way, but
   this is OK since there is no data that can be received corrupted. */

#undef FUNCNAME
#define FUNCNAME MPIC_Send_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
441
int MPIC_Send_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
442
443
444
                 MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
445
446
447
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
448
449
450
451
452
453
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

454
455
456
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

457
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
458
459
        MPIR_TAG_SET_ERROR_BIT(tag);

460
461
462
463
464
465
466
467
468
469
470
471
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Send(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
472

473
 fn_exit:
474
475
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
    return mpi_errno;
476
 fn_fail:
477
478
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
479
    goto fn_exit;
480
    /* --END ERROR HANDLING-- */
481
482
483
484
485
486
487
488
489
490
}

#undef FUNCNAME
#define FUNCNAME MPIC_Recv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
                 MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
491
    int context_id;
492
    MPI_Status mystatus;
493
494
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
495
496
497
498
499
500
    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

501
502
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);
503

504
505
506
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
507

508
509
    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
510

511
512
    mpi_errno = MPID_Recv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, status, &request_ptr);
513
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
514
515
516
517
518
519
520
521
522
523
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno == MPI_SUCCESS) {
            *status = request_ptr->status;
            mpi_errno = request_ptr->status.MPI_ERROR;
        } else {
            MPIU_ERR_POP(mpi_errno);
        }
        MPID_Request_release(request_ptr);
    }
524

525
    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
526
527

    if (source != MPI_PROC_NULL) {
528
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
529
            *errflag = TRUE;
530
531
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
532
533
534
535
536
537
538
539
540
            MPIU_Assert(status->MPI_TAG == tag);
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
    return mpi_errno;
 fn_fail:
541
542
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
543
544
545
546
547
548
549
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Ssend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
550
int MPIC_Ssend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
551
552
553
                  MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
554
555
556
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
557
558
559
560
561
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
562
563
564
565
566
567
568
569

    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
            "**countneg", "**countneg %d", count);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

570
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
571
572
        MPIR_TAG_SET_ERROR_BIT(tag);

573
574
575
576
577
578
579
580
    mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
                           context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
581

582
 fn_exit:
583
584
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
    return mpi_errno;
585
 fn_fail:
586
587
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
588
    goto fn_exit;
589
    /* --END ERROR HANDLING-- */
590
591
592
593
594
595
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
596
int MPIC_Sendrecv_ft(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
597
598
599
600
601
                     int dest, int sendtag, void *recvbuf, int recvcount,
                     MPI_Datatype recvtype, int source, int recvtag,
                     MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
602
    int context_id;
603
    MPI_Status mystatus;
604
605
    MPID_Request *recv_req_ptr = NULL, *send_req_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
606
607
608
609
610
611
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

612
613
614
615
616
617
618
619
620
    MPIU_ERR_CHKANDJUMP1((sendcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", sendcount);
    MPIU_ERR_CHKANDJUMP1((recvcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", recvcount);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

621
622
623
    if (MPIR_PARAM_ENABLE_COLL_FT_RET) {
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
624
    }
625

626
627
    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
628
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, recvtag,
                           comm_ptr, context_id, &send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIC_Wait(send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) MPIU_ERR_POPFATAL(mpi_errno);

    *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);

    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
645
646

    if (source != MPI_PROC_NULL) {
647
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
648
            *errflag = TRUE;
649
650
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }
    
 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");

    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
                             int dest, int sendtag,
                             int source, int recvtag,
                             MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
675
676
677
678
679
680
681
682
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
    int tmpbuf_size = 0;
    int tmpbuf_count = 0;
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
683
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
684
685
686
687
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
688
689
690
691
692

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

693
694
695
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

696
697
698
    if (MPIR_PARAM_ENABLE_COLL_FT_RET) {
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
699
700
    }

701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL) {
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset, &sreq);
    if (mpi_errno != MPI_SUCCESS) {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive
         * request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS) {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    *status = rreq->status;

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

757
    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
758
759
760
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
    if (source != MPI_PROC_NULL) {
761
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
762
            *errflag = TRUE;
763
764
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
765
766
767
768
769
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }

 fn_exit:
770
    MPIU_CHKLMEM_FREEALL();
771
772
773
774
775
776
777
778
779
780
781
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Isend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
782
int MPIC_Isend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
783
784
785
786
787
788
789
790
791
                  MPI_Comm comm, MPI_Request *request, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

792
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
793
794
795
        MPIR_TAG_SET_ERROR_BIT(tag);

    mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
796

797
 fn_exit:
798
799
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
    return mpi_errno;
800
801
 fn_fail:
    goto fn_exit;
802
803
804
805
806
807
808
809
810
811
}

#undef FUNCNAME
#define FUNCNAME MPIC_Irecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
                  int tag, MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno = MPI_SUCCESS;
812
813
814
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
815
816
817
818
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);

819
820
821
822
823
824
825
826
827
828
829
830
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(buf, count, datatype, source, tag, comm_ptr,
            context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    *request = request_ptr->handle;
831

832
 fn_exit:
833
834
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
    return mpi_errno;
835
836
 fn_fail:
    goto fn_exit;
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
}


#undef FUNCNAME
#define FUNCNAME MPIC_Waitall_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);

    MPIU_Assert(statuses != MPI_STATUSES_IGNORE);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

856
857
858
    /* The MPI_TAG field is not set for send operations, so if we want
       to check for the error bit in the tag below, we should initialize all
       tag fields here. */
859
860
861
    for (i = 0; i < numreq; ++i)
        statuses[i].MPI_TAG = 0;
    
862
863
864
    mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

865
    if (*errflag || !MPIR_PARAM_ENABLE_COLL_FT_RET)
866
867
868
        goto fn_exit;

    for (i = 0; i < numreq; ++i) {
869
        if (MPIR_TAG_CHECK_ERROR_BIT(statuses[i].MPI_TAG)) {
870
            *errflag = TRUE;
871
            MPIR_TAG_CLEAR_ERROR_BIT(statuses[i].MPI_TAG);
872
873
874
875
876
877
878
879
880
881
882
            break;
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}