Commit 6523ad97 authored by Sangmin Seo's avatar Sangmin Seo
Browse files

Fix async progress problem in NBC I/O.



When the async progress thread blocked the progress engine and yielded
control, if a thread started waiting inside a wait routine, e.g.,
ADIOI_GEN_iwc_wait_fn, of NBC I/O implementation, a deadlock happened.
The thread waiting continuously called MPI_Test to make progress, but
the progress engine did not make progress because it was blocked due to
the async progress thread.  The async progress thread tried to acquire
the lock, but the waiting thread did not release the lock because it
did not finish the wait routine.  Thus, it was a deadlock. This patch
fixes this deadlock problem by forcing the waiting thread to yield if
the progress engine has been blocked by another thread.

Fixes #2202
Signed-off-by: Rob Latham's avatarRob Latham <robl@mcs.anl.gov>
parent 8a9d5c71
......@@ -49,6 +49,25 @@ void MPIR_Ext_cs_exit_allfunc(void)
MPIU_THREAD_CS_EXIT(ALLFUNC,);
}
/* This routine is for a thread to yield control when the thread is waiting for
* the completion of communication inside a ROMIO routine but the progress
* engine is blocked by another thread. */
#ifdef MPICH_IS_THREADED
extern volatile int MPIDI_CH3I_progress_blocked;
#endif
void MPIR_Ext_cs_yield_allfunc_if_progress_blocked(void)
{
#ifdef MPICH_IS_THREADED
MPIU_THREAD_CHECK_BEGIN;
{
if (MPIDI_CH3I_progress_blocked == TRUE) {
MPIU_THREAD_CS_YIELD(ALLFUNC,);
}
}
MPIU_THREAD_CHECK_END;
#endif
}
/* will consider MPI_DATATYPE_NULL to be an error */
#undef FUNCNAME
#define FUNCNAME MPIR_Ext_datatype_iscommitted
......
......@@ -53,6 +53,7 @@ int MPIR_Ext_init(void);
void MPIR_Ext_cs_enter_allfunc(void);
void MPIR_Ext_cs_exit_allfunc(void);
void MPIR_Ext_cs_yield_allfunc_if_progress_blocked(void);
/* to facilitate error checking */
int MPIR_Ext_datatype_iscommitted(MPI_Datatype datatype);
......
......@@ -7,6 +7,7 @@
#include "adio.h"
#include "adio_extern.h"
#include "mpiu_greq.h"
#include "mpioimpl.h"
#ifdef USE_DBG_LOGGING
#define RDCOLL_DEBUG 1
......@@ -1303,6 +1304,10 @@ static int ADIOI_GEN_irc_wait_fn(int count, void **array_of_states,
if ((timeout > 0) && (timeout < (MPI_Wtime() - starttime)))
goto fn_exit;
/* If the progress engine is blocked, we have to yield for another
thread to be able to unblock the progress engine. */
MPIU_THREAD_CS_YIELD(ALLFUNC,_if_progress_blocked);
}
}
......
......@@ -7,6 +7,7 @@
#include "adio.h"
#include "adio_extern.h"
#include "mpiu_greq.h"
#include "mpioimpl.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
......@@ -1527,6 +1528,10 @@ static int ADIOI_GEN_iwc_wait_fn(int count, void **array_of_states,
if ((timeout > 0) && (timeout < (MPI_Wtime() - starttime)))
goto fn_exit;
/* If the progress engine is blocked, we have to yield for another
thread to be able to unblock the progress engine. */
MPIU_THREAD_CS_YIELD(ALLFUNC,_if_progress_blocked);
}
}
......
......@@ -20,8 +20,10 @@
#define MPIU_THREAD_CS_ENTER(name_,ctx_) MPIU_THREAD_CS_ENTER_##name_(ctx_)
#define MPIU_THREAD_CS_EXIT(name_,ctx_) MPIU_THREAD_CS_EXIT_##name_(ctx_)
#define MPIU_THREAD_CS_YIELD(name_,ctx_) MPIU_THREAD_CS_YIELD_##name_(ctx_)
#define MPIU_THREAD_CS_ENTER_ALLFUNC(ctx_) MPIR_Ext_cs_enter_allfunc()
#define MPIU_THREAD_CS_EXIT_ALLFUNC(ctx_) MPIR_Ext_cs_exit_allfunc()
#define MPIU_THREAD_CS_YIELD_ALLFUNC(ctx_) MPIR_Ext_cs_yield_allfunc##ctx_()
/* committed datatype checking support in ROMIO */
#define MPIO_DATATYPE_ISCOMMITTED(dtype_, err_) \
......@@ -36,6 +38,7 @@
of correct programs */
#define MPIU_THREAD_CS_ENTER(x,y)
#define MPIU_THREAD_CS_EXIT(x,y)
#define MPIU_THREAD_CS_YIELD(x,y)
#define MPIO_DATATYPE_ISCOMMITTED(dtype_, err_) do {} while (0)
#ifdef HAVE_WINDOWS_H
#define MPIU_UNREFERENCED_ARG(a) a
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment