helper_fns.c 28.8 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
/*
 *
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"
#include "datatype.h"

11
12
#define COPY_BUFFER_SZ 16384

13
14
15
16
17
/* These functions are used in the implementation of collective
   operations. They are wrappers around MPID send/recv functions. They do
   sends/receives by setting the context offset to
   MPID_CONTEXT_INTRA_COLL or MPID_CONTEXT_INTER_COLL. */

18
19
20
21
22
static int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int
                     source, int tag, MPI_Comm comm, MPI_Request *request);
static int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                     MPI_Comm comm, MPI_Request *request);

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#undef FUNCNAME
#define FUNCNAME MPIC_Probe
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno = MPI_SUCCESS;
    int context_id;
    MPID_Comm *comm_ptr;

    MPID_Comm_get_ptr( comm, comm_ptr );

    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
    
    mpi_errno = MPID_Probe(source, tag, comm_ptr, context_id, status);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


48
49
50
51
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv
#undef FCNAME
#define FCNAME "MPIC_Sendrecv"
52
53
54
55
static int MPIC_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                         int dest, int sendtag, void *recvbuf, int recvcount,
                         MPI_Datatype recvtype, int source, int recvtag,
                         MPI_Comm comm, MPI_Status *status)
56
57
{
    MPID_Request *recv_req_ptr=NULL, *send_req_ptr=NULL;
58
59
    int mpi_errno = MPI_SUCCESS;
    int context_id;
60
61
62
63
64
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV);

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV);

65
66
67
68
69
    MPIU_ERR_CHKANDJUMP1((sendcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", sendcount);
    MPIU_ERR_CHKANDJUMP1((recvcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", recvcount);

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, sendtag, 
                           comm_ptr, context_id, &send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

    mpi_errno = MPIC_Wait(send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POPFATAL(mpi_errno); }
    if (status != MPI_STATUS_IGNORE)
        *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);
 fn_fail:
93
    /* --BEGIN ERROR HANDLING-- */
94
95
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV);
    return mpi_errno;
96
    /* --END ERROR HANDLING-- */
97
98
}

99
100
101
102
103
104
105
/* NOTE: for regular collectives (as opposed to irregular collectives) calling
 * this function repeatedly will almost always be slower than performing the
 * equivalent inline because of the overhead of the repeated malloc/free */
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
106
107
108
109
static int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
                                 int dest, int sendtag,
                                 int source, int recvtag,
                                 MPI_Comm comm, MPI_Status *status)
110
111
112
113
114
115
116
117
118
119
120
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
    int tmpbuf_size = 0;
    int tmpbuf_count = 0;
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
121
122
123
124
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
125
126

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
127
128
129
130

    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

131
132
133
134
135
136
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL)
    {
137
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
138
139
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

140
        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset,
                           &sreq);
    if (mpi_errno != MPI_SUCCESS)
    {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

160
    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
161
162
163
164
    {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
165
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
        {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS)
            {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    if (status != MPI_STATUS_IGNORE) {
        *status = rreq->status;
    }

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
203
204
205
206
207

#undef FUNCNAME
#define FUNCNAME MPIR_Localcopy
#undef FCNAME
#define FCNAME "MPIR_Localcopy"
208
int MPIR_Localcopy(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
209
210
                   void *recvbuf, int recvcount, MPI_Datatype recvtype)
{
211
212
213
    int mpi_errno = MPI_SUCCESS;
    int sendtype_iscontig, recvtype_iscontig;
    MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
214
    MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
215
    MPIU_CHKLMEM_DECL(1);
216
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
217

218
219
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);

220
221
222
    MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
    MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);

223
224
225
226
    MPID_Datatype_get_size_macro(sendtype, sendsize);
    MPID_Datatype_get_size_macro(recvtype, recvsize);
    sdata_sz = sendsize * sendcount;
    rdata_sz = recvsize * recvcount;
227
228
229

    if (!sdata_sz || !rdata_sz)
        goto fn_exit;
230
231
    
    if (sdata_sz > rdata_sz)
232
    {
233
234
235
236
237
238
239
        MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz);
        copy_sz = rdata_sz;
    }
    else
    {
        copy_sz = sdata_sz;
    }
240

241
242
    MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
243
244

    if (sendtype_iscontig && recvtype_iscontig)
245
246
247
248
249
250
251
252
253
    {
#if defined(HAVE_ERROR_CHECKING)
        MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno,
                                  ((char *)recvbuf + recvtype_true_lb),
                                  ((char *)sendbuf + sendtype_true_lb),
                                  copy_sz);
#endif
        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
               ((char *) sendbuf + sendtype_true_lb),
254
               copy_sz);
255
    }
256
257
258
    else if (sendtype_iscontig)
    {
        MPID_Segment seg;
259
	MPI_Aint last;
260
261
262
263

	MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb);
264
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
265
266
267
268
    }
    else if (recvtype_iscontig)
    {
        MPID_Segment seg;
269
	MPI_Aint last;
270
271
272
273
274
275
276
277
278
279
280
281
282
283

	MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;
284
285

        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334

	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	while (1)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = copy_sz;
	    }
	    
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_Assert(last > sfirst);
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_Assert(last > rfirst);

	    rfirst = last;

	    if (rfirst == copy_sz)
	    {
		/* successful completion */
		break;
	    }

            /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */
            MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");        

            /* if not all data was unpacked, copy it to the front of the buffer for next time */
	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}
335
336
    }
    
337
    
338
  fn_exit:
339
    MPIU_CHKLMEM_FREEALL();
340
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
341
342
343
344
345
346
347
348
349
350
351
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPIC_Isend
#undef FCNAME
#define FCNAME "MPIC_Isend"
352
353
static int MPIC_Isend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                      MPI_Comm comm, MPI_Request *request)
354
{
355
356
    int mpi_errno = MPI_SUCCESS;
    int context_id;
357
358
359
360
361
362
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr=NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND);

    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_ISEND);

363
364
365
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

366
367
368
369
370
371
372
373
374
375
376
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Isend(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } 

    *request = request_ptr->handle;

 fn_fail:
377
    /* --BEGIN ERROR HANDLING-- */
378
379
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_ISEND);
    return mpi_errno;
380
    /* --END ERROR HANDLING-- */
381
382
383
384
385
386
387
}


#undef FUNCNAME
#define FUNCNAME MPIC_Irecv
#undef FCNAME
#define FCNAME "MPIC_Irecv"
388
389
static int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int
                    source, int tag, MPI_Comm comm, MPI_Request *request)
390
{
391
392
    int mpi_errno = MPI_SUCCESS;
    int context_id;
393
394
395
396
397
398
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV);

    MPIDI_PT2PT_FUNC_ENTER_BACK(MPID_STATE_MPIC_IRECV);

399
400
401
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

402
403
404
405
406
407
408
409
410
411
412
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

    *request = request_ptr->handle;

 fn_fail:
413
    /* --BEGIN ERROR HANDLING-- */
414
415
    MPIDI_PT2PT_FUNC_EXIT_BACK(MPID_STATE_MPIC_IRECV);
    return mpi_errno;
416
    /* --END ERROR HANDLING-- */
417
418
}

419
420
421
422
/* FIXME: For the brief-global and finer-grain control, we must ensure that
   the global lock is *not* held when this routine is called. (unless we change
   progress_start/end to grab the lock, in which case we must *still* make
   sure that the lock is not held when this routine is called). */
423
424
425
426
427
428
#undef FUNCNAME
#define FUNCNAME MPIC_Wait
#undef FCNAME
#define FCNAME "MPIC_Wait"
int MPIC_Wait(MPID_Request * request_ptr)
{
Rajeev Thakur's avatar
Rajeev Thakur committed
429
    int mpi_errno = MPI_SUCCESS;
430
431
432
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAIT);

    MPIDI_PT2PT_FUNC_ENTER(MPID_STATE_MPIC_WAIT);
433
    if (!MPID_Request_is_complete(request_ptr))
434
435
436
437
    {
	MPID_Progress_state progress_state;
	
	MPID_Progress_start(&progress_state);
438
        while (!MPID_Request_is_complete(request_ptr))
439
440
441
442
443
444
445
446
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	}
	MPID_Progress_end(&progress_state);
    }

 fn_fail:
447
    /* --BEGIN ERROR HANDLING-- */
448
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_WAIT);
449
    return mpi_errno;
450
    /* --END ERROR HANDLING-- */
451
}
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477


/* Fault-tolerance versions.  When a process fails, collectives will
   still complete, however the result may be invalid.  Processes
   directly communicating with the failed process can detect the
   failure, however another mechanism is needed to commuinicate the
   failure to other processes receiving the invalid data.  To do this
   we introduce the _ft versions of the MPIC_ helper functions.  These
   functions take a pointer to an error flag.  When this is set to
   TRUE, the send functions will communicate the failure to the
   receiver.  If a function detects a failure, either by getting a
   failure in the communication operation, or by receiving an error
   indicator from a remote process, it sets the error flag to TRUE.

   In this implementation, we indicate an error to a remote process by
   sending an empty message instead of the requested buffer.  When a
   process receives an empty message, it knows to set the error flag.
   We count on the fact that collectives that exchange data (as
   opposed to barrier) will never send an empty message.  The barrier
   collective will not communicate failure information this way, but
   this is OK since there is no data that can be received corrupted. */

#undef FUNCNAME
#define FUNCNAME MPIC_Send_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
478
int MPIC_Send_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
479
480
481
                 MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
482
483
484
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
485
486
487
488
489
490
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

491
492
493
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

494
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
495
496
        MPIR_TAG_SET_ERROR_BIT(tag);

497
498
499
500
501
502
503
504
505
506
507
508
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Send(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
509

510
 fn_exit:
511
512
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
    return mpi_errno;
513
 fn_fail:
514
515
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
516
    goto fn_exit;
517
    /* --END ERROR HANDLING-- */
518
519
520
521
522
523
524
525
526
527
}

#undef FUNCNAME
#define FUNCNAME MPIC_Recv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
                 MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
528
    int context_id;
529
    MPI_Status mystatus;
530
531
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
532
533
534
535
536
537
    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

538
539
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);
540

541
542
543
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
544

545
546
    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
547

548
549
    mpi_errno = MPID_Recv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, status, &request_ptr);
550
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
551
552
553
554
555
556
557
558
559
560
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno == MPI_SUCCESS) {
            *status = request_ptr->status;
            mpi_errno = request_ptr->status.MPI_ERROR;
        } else {
            MPIU_ERR_POP(mpi_errno);
        }
        MPID_Request_release(request_ptr);
    }
561

562
    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
563
564

    if (source != MPI_PROC_NULL) {
565
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
566
            *errflag = TRUE;
567
568
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
569
570
571
572
573
574
575
576
577
            MPIU_Assert(status->MPI_TAG == tag);
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
    return mpi_errno;
 fn_fail:
578
579
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
580
581
582
583
584
585
586
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Ssend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
587
int MPIC_Ssend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
588
589
590
                  MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
591
592
593
    int context_id;
    MPID_Request *request_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
594
595
596
597
598
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
599
600
601
602
603
604
605
606

    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
            "**countneg", "**countneg %d", count);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

607
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
608
609
        MPIR_TAG_SET_ERROR_BIT(tag);

610
611
612
613
614
615
616
617
    mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
                           context_id, &request_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Request_release(request_ptr);
    }
618

619
 fn_exit:
620
621
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
    return mpi_errno;
622
 fn_fail:
623
624
    /* --BEGIN ERROR HANDLING-- */
    if (request_ptr) MPID_Request_release(request_ptr);
625
    goto fn_exit;
626
    /* --END ERROR HANDLING-- */
627
628
629
630
631
632
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
633
int MPIC_Sendrecv_ft(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
634
635
636
637
638
                     int dest, int sendtag, void *recvbuf, int recvcount,
                     MPI_Datatype recvtype, int source, int recvtag,
                     MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
639
    int context_id;
640
    MPI_Status mystatus;
641
642
    MPID_Request *recv_req_ptr = NULL, *send_req_ptr = NULL;
    MPID_Comm *comm_ptr = NULL;
643
644
645
646
647
648
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

649
650
651
652
653
654
655
656
657
    MPIU_ERR_CHKANDJUMP1((sendcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", sendcount);
    MPIU_ERR_CHKANDJUMP1((recvcount < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", recvcount);

    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

658
659
660
    if (MPIR_PARAM_ENABLE_COLL_FT_RET) {
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
661
    }
662

663
664
    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
665
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, recvtag,
                           comm_ptr, context_id, &send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPIC_Wait(send_req_ptr);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) MPIU_ERR_POPFATAL(mpi_errno);

    *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);

    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
682
683

    if (source != MPI_PROC_NULL) {
684
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
685
            *errflag = TRUE;
686
687
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }
    
 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");

    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
                             int dest, int sendtag,
                             int source, int recvtag,
                             MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
712
713
714
715
716
717
718
719
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
    int tmpbuf_size = 0;
    int tmpbuf_count = 0;
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
720
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
721
722
723
724
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
725
726
727
728
729

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

730
731
732
    MPIU_ERR_CHKANDJUMP1((count < 0), mpi_errno, MPI_ERR_COUNT,
                         "**countneg", "**countneg %d", count);

733
734
735
    if (MPIR_PARAM_ENABLE_COLL_FT_RET) {
        if (status == MPI_STATUS_IGNORE) status = &mystatus;
        if (*errflag) MPIR_TAG_SET_ERROR_BIT(sendtag);
736
737
    }

738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
    MPID_Comm_get_ptr(comm, comm_ptr);
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL) {
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset, &sreq);
    if (mpi_errno != MPI_SUCCESS) {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive
         * request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq)) {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS) {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    *status = rreq->status;

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

794
    if (!MPIR_PARAM_ENABLE_COLL_FT_RET) goto fn_exit;
795
796
797
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
    if (source != MPI_PROC_NULL) {
798
        if (MPIR_TAG_CHECK_ERROR_BIT(status->MPI_TAG)) {
799
            *errflag = TRUE;
800
801
            MPIR_TAG_CLEAR_ERROR_BIT(status->MPI_TAG);
        } else {
802
803
804
805
806
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }

 fn_exit:
807
    MPIU_CHKLMEM_FREEALL();
808
809
810
811
812
813
814
815
816
817
818
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Isend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
819
int MPIC_Isend_ft(const void *buf, int count, MPI_Datatype datatype, int dest, int tag,
820
821
822
823
824
825
826
827
828
                  MPI_Comm comm, MPI_Request *request, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

829
    if (*errflag && MPIR_PARAM_ENABLE_COLL_FT_RET)
830
831
832
        MPIR_TAG_SET_ERROR_BIT(tag);

    mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);
833

834
 fn_exit:
835
836
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
    return mpi_errno;
837
838
 fn_fail:
    goto fn_exit;
839
840
841
842
843
844
845
846
847
848
849
850
851
852
}

#undef FUNCNAME
#define FUNCNAME MPIC_Irecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
                  int tag, MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);

853
    mpi_errno = MPIC_Irecv(buf, count, datatype, source, tag, comm, request);
854

855
 fn_exit:
856
857
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
    return mpi_errno;
858
859
 fn_fail:
    goto fn_exit;
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
}


#undef FUNCNAME
#define FUNCNAME MPIC_Waitall_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);

    MPIU_Assert(statuses != MPI_STATUSES_IGNORE);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

879
880
881
    /* The MPI_TAG field is not set for send operations, so if we want
       to check for the error bit in the tag below, we should initialize all
       tag fields here. */
882
883
884
    for (i = 0; i < numreq; ++i)
        statuses[i].MPI_TAG = 0;
    
885
886
887
    mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

888
    if (*errflag || !MPIR_PARAM_ENABLE_COLL_FT_RET)
889
890
891
        goto fn_exit;

    for (i = 0; i < numreq; ++i) {
892
        if (MPIR_TAG_CHECK_ERROR_BIT(statuses[i].MPI_TAG)) {
893
            *errflag = TRUE;
894
            MPIR_TAG_CLEAR_ERROR_BIT(statuses[i].MPI_TAG);
895
896
897
898
899
900
901
902
903
904
905
            break;
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}