helper_fns.c 25.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"
#include "datatype.h"

11
12
#define COPY_BUFFER_SZ 16384

13
14
15
16
17
/* These functions are used in the implementation of collective
   operations. They are wrappers around MPID send/recv functions. They do
   sends/receives by setting the context offset to
   MPID_CONTEXT_INTRA_COLL or MPID_CONTEXT_INTER_COLL. */

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#undef FUNCNAME
#define FUNCNAME MPIC_Probe
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Probe(int source, int tag, MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno = MPI_SUCCESS;
    int context_id;
    MPID_Comm *comm_ptr;

    MPID_Comm_get_ptr( comm, comm_ptr );

    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;
    
    mpi_errno = MPID_Probe(source, tag, comm_ptr, context_id, status);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#undef FUNCNAME
#define FUNCNAME MPIC_Send
#undef FCNAME
#define FCNAME "MPIC_Send"
int MPIC_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
              MPI_Comm comm)
{
    int mpi_errno, context_id;
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr=NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND);

    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_SEND);

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Send(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	MPID_Request_release(request_ptr);
    }
 fn_exit:
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_SEND);
    return mpi_errno;
 fn_fail:
73
    /* --BEGIN ERROR HANDLING-- */
74
75
76
77
    if (request_ptr) {
        MPID_Request_release(request_ptr);
    }
    goto fn_exit;
78
    /* --END ERROR HANDLING-- */
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
}

#undef FUNCNAME
#define FUNCNAME MPIC_Recv
#undef FCNAME
#define FCNAME "MPIC_Recv"
int MPIC_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag,
	     MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno, context_id;
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV);

    MPIDI_PT2PT_FUNC_ENTER_BACK(MPID_STATE_MPIC_RECV);

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Recv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, status, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
	if (mpi_errno == MPI_SUCCESS) {
	    if (status != MPI_STATUS_IGNORE) {
		*status = request_ptr->status;
	    }
	    mpi_errno = request_ptr->status.MPI_ERROR;
	}
	else { MPIU_ERR_POP(mpi_errno); }

        MPID_Request_release(request_ptr);
    }
 fn_exit:
    MPIDI_PT2PT_FUNC_EXIT_BACK(MPID_STATE_MPIC_RECV);
    return mpi_errno;
 fn_fail:
118
    /* --BEGIN ERROR HANDLING-- */
119
120
121
122
    if (request_ptr) { 
	MPID_Request_release(request_ptr);
    }
    goto fn_exit;
123
    /* --END ERROR HANDLING-- */
124
125
}

126
127
128
129
130
131
132
133
134
135
#undef FUNCNAME
#define FUNCNAME MPIC_Ssend
#undef FCNAME
#define FCNAME "MPIC_Ssend"
int MPIC_Ssend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
               MPI_Comm comm)
{
    int mpi_errno, context_id;
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr=NULL;
136
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND);
137

138
    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_SSEND);
139
140
141
142
143
144
145
146
147
148
149
150
151
152

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Ssend(buf, count, datatype, dest, tag, comm_ptr,
                           context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    if (request_ptr) {
        mpi_errno = MPIC_Wait(request_ptr);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	MPID_Request_release(request_ptr);
    }
 fn_exit:
153
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_SSEND);
154
155
    return mpi_errno;
 fn_fail:
156
    /* --BEGIN ERROR HANDLING-- */
157
158
159
160
    if (request_ptr) {
        MPID_Request_release(request_ptr);
    }
    goto fn_exit;
161
    /* --END ERROR HANDLING-- */
162
163
}

164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv
#undef FCNAME
#define FCNAME "MPIC_Sendrecv"
int MPIC_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                  int dest, int sendtag, void *recvbuf, int recvcount,
                  MPI_Datatype recvtype, int source, int recvtag,
                  MPI_Comm comm, MPI_Status *status) 
{
    MPID_Request *recv_req_ptr=NULL, *send_req_ptr=NULL;
    int mpi_errno, context_id;
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV);

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV);

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(recvbuf, recvcount, recvtype, source, recvtag,
                           comm_ptr, context_id, &recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    mpi_errno = MPID_Isend(sendbuf, sendcount, sendtype, dest, sendtag, 
                           comm_ptr, context_id, &send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

    mpi_errno = MPIC_Wait(send_req_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    
    mpi_errno = MPIC_Wait(recv_req_ptr);
    if (mpi_errno) { MPIU_ERR_POPFATAL(mpi_errno); }
    if (status != MPI_STATUS_IGNORE)
        *status = recv_req_ptr->status;
    mpi_errno = recv_req_ptr->status.MPI_ERROR;

    MPID_Request_release(send_req_ptr);
    MPID_Request_release(recv_req_ptr);
 fn_fail:
203
    /* --BEGIN ERROR HANDLING-- */
204
205
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV);
    return mpi_errno;
206
    /* --END ERROR HANDLING-- */
207
208
}

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
/* NOTE: for regular collectives (as opposed to irregular collectives) calling
 * this function repeatedly will almost always be slower than performing the
 * equivalent inline because of the overhead of the repeated malloc/free */
#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_replace(void *buf, int count, MPI_Datatype datatype,
                          int dest, int sendtag,
                          int source, int recvtag,
                          MPI_Comm comm, MPI_Status *status)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Context_id_t context_id_offset;
    MPID_Request *sreq;
    MPID_Request *rreq;
    void *tmpbuf = NULL;
    int tmpbuf_size = 0;
    int tmpbuf_count = 0;
    MPID_Comm *comm_ptr;
    MPIU_CHKLMEM_DECL(1);
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE);
231
232
233
234
#ifdef MPID_LOG_ARROWS
    /* The logging macros log sendcount and recvcount */
    int sendcount = count, recvcount = count;
#endif
235
236

    MPIDI_PT2PT_FUNC_ENTER_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
237
    
238
239
240
241
242
243
    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id_offset = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    if (count > 0 && dest != MPI_PROC_NULL)
    {
244
        MPIR_Pack_size_impl(count, datatype, &tmpbuf_size);
245
246
        MPIU_CHKLMEM_MALLOC(tmpbuf, void *, tmpbuf_size, mpi_errno, "temporary send buffer");

247
        mpi_errno = MPIR_Pack_impl(buf, count, datatype, tmpbuf, tmpbuf_size, &tmpbuf_count);
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = MPID_Irecv(buf, count, datatype, source, recvtag,
                           comm_ptr, context_id_offset, &rreq);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    mpi_errno = MPID_Isend(tmpbuf, tmpbuf_count, MPI_PACKED, dest,
                           sendtag, comm_ptr, context_id_offset,
                           &sreq);
    if (mpi_errno != MPI_SUCCESS)
    {
        /* --BEGIN ERROR HANDLING-- */
        /* FIXME: should we cancel the pending (possibly completed) receive request or wait for it to complete? */
        MPID_Request_release(rreq);
        MPIU_ERR_POP(mpi_errno);
        /* --END ERROR HANDLING-- */
    }

267
    if (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
268
269
270
271
    {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
272
        while (!MPID_Request_is_complete(sreq) || !MPID_Request_is_complete(rreq))
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
        {
            mpi_errno = MPID_Progress_wait(&progress_state);
            if (mpi_errno != MPI_SUCCESS)
            {
                /* --BEGIN ERROR HANDLING-- */
                MPID_Progress_end(&progress_state);
                MPIU_ERR_POP(mpi_errno);
                /* --END ERROR HANDLING-- */
            }
        }
        MPID_Progress_end(&progress_state);
    }

    if (status != MPI_STATUS_IGNORE) {
        *status = rreq->status;
    }

    if (mpi_errno == MPI_SUCCESS) {
        mpi_errno = rreq->status.MPI_ERROR;

        if (mpi_errno == MPI_SUCCESS) {
            mpi_errno = sreq->status.MPI_ERROR;
        }
    }

    MPID_Request_release(sreq);
    MPID_Request_release(rreq);

    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_PT2PT_FUNC_EXIT_BOTH(MPID_STATE_MPIC_SENDRECV_REPLACE);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
310
311
312
313
314
315
316
317

#undef FUNCNAME
#define FUNCNAME MPIR_Localcopy
#undef FCNAME
#define FCNAME "MPIR_Localcopy"
int MPIR_Localcopy(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, int recvcount, MPI_Datatype recvtype)
{
318
319
320
    int mpi_errno = MPI_SUCCESS;
    int sendtype_iscontig, recvtype_iscontig;
    MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz;
321
    MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb;
322
    MPIU_CHKLMEM_DECL(1);
323
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY);
324

325
326
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY);

327
328
329
    MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig);
    MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig);

330
331
332
333
    MPID_Datatype_get_size_macro(sendtype, sendsize);
    MPID_Datatype_get_size_macro(recvtype, recvsize);
    sdata_sz = sendsize * sendcount;
    rdata_sz = recvsize * recvcount;
334
335
336

    if (!sdata_sz || !rdata_sz)
        goto fn_exit;
337
338
    
    if (sdata_sz > rdata_sz)
339
    {
340
341
342
343
344
345
346
        MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz);
        copy_sz = rdata_sz;
    }
    else
    {
        copy_sz = sdata_sz;
    }
347

348
349
    MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent);
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent);
350
351

    if (sendtype_iscontig && recvtype_iscontig)
352
353
354
355
356
357
358
359
360
    {
#if defined(HAVE_ERROR_CHECKING)
        MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno,
                                  ((char *)recvbuf + recvtype_true_lb),
                                  ((char *)sendbuf + sendtype_true_lb),
                                  copy_sz);
#endif
        MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb),
               ((char *) sendbuf + sendtype_true_lb),
361
               copy_sz);
362
    }
363
364
365
    else if (sendtype_iscontig)
    {
        MPID_Segment seg;
366
	MPI_Aint last;
367
368
369
370

	MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb);
371
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
372
373
374
375
    }
    else if (recvtype_iscontig)
    {
        MPID_Segment seg;
376
	MPI_Aint last;
377
378
379
380
381
382
383
384
385
386
387
388
389
390

	MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0);
	last = copy_sz;
	MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb);
        MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");
    }
    else
    {
	char * buf;
	MPIDI_msg_sz_t buf_off;
	MPID_Segment sseg;
	MPIDI_msg_sz_t sfirst;
	MPID_Segment rseg;
	MPIDI_msg_sz_t rfirst;
391
392

        MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf");
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441

	MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0);
	MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0);

	sfirst = 0;
	rfirst = 0;
	buf_off = 0;
	
	while (1)
	{
	    MPI_Aint last;
	    char * buf_end;

	    if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off)
	    {
		last = sfirst + (COPY_BUFFER_SZ - buf_off);
	    }
	    else
	    {
		last = copy_sz;
	    }
	    
	    MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off);
	    MPIU_Assert(last > sfirst);
	    
	    buf_end = buf + buf_off + (last - sfirst);
	    sfirst = last;
	    
	    MPID_Segment_unpack(&rseg, rfirst, &last, buf);
	    MPIU_Assert(last > rfirst);

	    rfirst = last;

	    if (rfirst == copy_sz)
	    {
		/* successful completion */
		break;
	    }

            /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */
            MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch");        

            /* if not all data was unpacked, copy it to the front of the buffer for next time */
	    buf_off = sfirst - rfirst;
	    if (buf_off > 0)
	    {
		memmove(buf, buf_end - buf_off, buf_off);
	    }
	}
442
443
    }
    
444
    
445
  fn_exit:
446
    MPIU_CHKLMEM_FREEALL();
447
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY);
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
    return mpi_errno;

  fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPIC_Isend
#undef FCNAME
#define FCNAME "MPIC_Isend"
int MPIC_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
              MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno, context_id;
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr=NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND);

    MPIDI_PT2PT_FUNC_ENTER_FRONT(MPID_STATE_MPIC_ISEND);

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Isend(buf, count, datatype, dest, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } 

    *request = request_ptr->handle;

 fn_fail:
480
    /* --BEGIN ERROR HANDLING-- */
481
482
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_ISEND);
    return mpi_errno;
483
    /* --END ERROR HANDLING-- */
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
}


#undef FUNCNAME
#define FUNCNAME MPIC_Irecv
#undef FCNAME
#define FCNAME "MPIC_Irecv"
int MPIC_Irecv(void *buf, int count, MPI_Datatype datatype, int
               source, int tag, MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno, context_id;
    MPID_Request *request_ptr=NULL;
    MPID_Comm *comm_ptr = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV);

    MPIDI_PT2PT_FUNC_ENTER_BACK(MPID_STATE_MPIC_IRECV);

    MPID_Comm_get_ptr( comm, comm_ptr );
    context_id = (comm_ptr->comm_kind == MPID_INTRACOMM) ?
        MPID_CONTEXT_INTRA_COLL : MPID_CONTEXT_INTER_COLL;

    mpi_errno = MPID_Irecv(buf, count, datatype, source, tag, comm_ptr,
                          context_id, &request_ptr); 
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

    *request = request_ptr->handle;

 fn_fail:
512
    /* --BEGIN ERROR HANDLING-- */
513
514
    MPIDI_PT2PT_FUNC_EXIT_BACK(MPID_STATE_MPIC_IRECV);
    return mpi_errno;
515
    /* --END ERROR HANDLING-- */
516
517
}

518
519
520
521
/* FIXME: For the brief-global and finer-grain control, we must ensure that
   the global lock is *not* held when this routine is called. (unless we change
   progress_start/end to grab the lock, in which case we must *still* make
   sure that the lock is not held when this routine is called). */
522
523
524
525
526
527
#undef FUNCNAME
#define FUNCNAME MPIC_Wait
#undef FCNAME
#define FCNAME "MPIC_Wait"
int MPIC_Wait(MPID_Request * request_ptr)
{
Rajeev Thakur's avatar
Rajeev Thakur committed
528
    int mpi_errno = MPI_SUCCESS;
529
530
531
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAIT);

    MPIDI_PT2PT_FUNC_ENTER(MPID_STATE_MPIC_WAIT);
532
    if (!MPID_Request_is_complete(request_ptr))
533
534
535
536
    {
	MPID_Progress_state progress_state;
	
	MPID_Progress_start(&progress_state);
537
        while (!MPID_Request_is_complete(request_ptr))
538
539
540
541
542
543
544
545
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
	}
	MPID_Progress_end(&progress_state);
    }

 fn_fail:
546
    /* --BEGIN ERROR HANDLING-- */
547
    MPIDI_PT2PT_FUNC_EXIT(MPID_STATE_MPIC_WAIT);
548
    return mpi_errno;
549
    /* --END ERROR HANDLING-- */
550
}
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847


/* Fault-tolerance versions.  When a process fails, collectives will
   still complete, however the result may be invalid.  Processes
   directly communicating with the failed process can detect the
   failure, however another mechanism is needed to commuinicate the
   failure to other processes receiving the invalid data.  To do this
   we introduce the _ft versions of the MPIC_ helper functions.  These
   functions take a pointer to an error flag.  When this is set to
   TRUE, the send functions will communicate the failure to the
   receiver.  If a function detects a failure, either by getting a
   failure in the communication operation, or by receiving an error
   indicator from a remote process, it sets the error flag to TRUE.

   In this implementation, we indicate an error to a remote process by
   sending an empty message instead of the requested buffer.  When a
   process receives an empty message, it knows to set the error flag.
   We count on the fact that collectives that exchange data (as
   opposed to barrier) will never send an empty message.  The barrier
   collective will not communicate failure information this way, but
   this is OK since there is no data that can be received corrupted. */

#undef FUNCNAME
#define FUNCNAME MPIC_Send_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Send_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                 MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    if (*errflag)
        mpi_errno = MPIC_Send(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
    else
        mpi_errno = MPIC_Send(buf, count, datatype, dest, tag, comm);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SEND_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Recv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Recv_ft(void *buf, int count, MPI_Datatype datatype, int source, int tag,
                 MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_RECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_RECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
    
    mpi_errno = MPIC_Recv(buf, count, datatype, source, MPI_ANY_TAG, comm, status);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (*errflag)
        goto fn_exit;

    if (source != MPI_PROC_NULL) {
        if (status->MPI_TAG == MPIR_ERROR_TAG)
            *errflag = TRUE;
        else {
            MPIU_Assert(status->MPI_TAG == tag);
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_RECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Ssend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Ssend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                  MPI_Comm comm, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SSEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SSEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");
    
    if (*errflag)
        mpi_errno = MPIC_Ssend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm);
    else
        mpi_errno = MPIC_Ssend(buf, count, datatype, dest, tag, comm);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SSEND_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_ft(void *sendbuf, int sendcount, MPI_Datatype sendtype,
                     int dest, int sendtag, void *recvbuf, int recvcount,
                     MPI_Datatype recvtype, int source, int recvtag,
                     MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
    
    if (*errflag) {
        mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, MPIR_ERROR_TAG,
                                  recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
                                  comm, status);
        goto fn_exit;
    }
    
    mpi_errno = MPIC_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag,
                              recvbuf, recvcount, recvtype, source, MPI_ANY_TAG,
                              comm, status);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (source != MPI_PROC_NULL) {
        if (status->MPI_TAG == MPIR_ERROR_TAG)
            *errflag = TRUE;
        else {
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }
    
 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");

    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Sendrecv_replace_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Sendrecv_replace_ft(void *buf, int count, MPI_Datatype datatype,
                             int dest, int sendtag,
                             int source, int recvtag,
                             MPI_Comm comm, MPI_Status *status, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Status mystatus;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    if (status == MPI_STATUS_IGNORE)
        status = &mystatus;
    
    if (*errflag) {
        mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
                                          dest, MPIR_ERROR_TAG,
                                          source, recvtag,
                                          comm, status);
        goto fn_exit;
    }
    
    mpi_errno = MPIC_Sendrecv_replace(buf, count, datatype,
                                      dest, sendtag,
                                      source, MPI_ANY_TAG,
                                      comm, status);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    
    if (source != MPI_PROC_NULL) {
        if (status->MPI_TAG == MPIR_ERROR_TAG)
            *errflag = TRUE;
        else {
            MPIU_Assert(status->MPI_TAG == recvtag);
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_SENDRECV_REPLACE_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Isend_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Isend_ft(void *buf, int count, MPI_Datatype datatype, int dest, int tag,
                  MPI_Comm comm, MPI_Request *request, int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_ISEND_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_ISEND_FT);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    if (*errflag)
        mpi_errno = MPIC_Isend(buf, count, datatype, dest, MPIR_ERROR_TAG, comm, request);
    else
        mpi_errno = MPIC_Isend(buf, count, datatype, dest, tag, comm, request);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_ISEND_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME MPIC_Irecv_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Irecv_ft(void *buf, int count, MPI_Datatype datatype, int source,
                  int tag, MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_IRECV_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_IRECV_FT);

    mpi_errno = MPIC_Irecv(buf, count, datatype, source, MPI_ANY_TAG, comm, request);

 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_IRECV_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


#undef FUNCNAME
#define FUNCNAME MPIC_Waitall_ft
#undef FCNAME
#define FCNAME MPIU_QUOTE(FUNCNAME)
int MPIC_Waitall_ft(int numreq, MPI_Request requests[], MPI_Status statuses[], int *errflag)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_MPIC_WAITALL_FT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIC_WAITALL_FT);

    MPIU_Assert(statuses != MPI_STATUSES_IGNORE);

    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "IN: errflag = %s", *errflag?"TRUE":"FALSE");

    mpi_errno = MPIR_Waitall_impl(numreq, requests, statuses);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (*errflag)
        goto fn_exit;

    for (i = 0; i < numreq; ++i) {
        if (statuses[i].MPI_TAG == MPIR_ERROR_TAG) {
            *errflag = TRUE;
            break;
        }
    }

 fn_exit:
    MPIU_DBG_MSG_S(PT2PT, TYPICAL, "OUT: errflag = %s", *errflag?"TRUE":"FALSE");
    MPIDI_FUNC_EXIT(MPID_STATE_MPIC_WAITALL_FT);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}