GitLab maintenance scheduled for Today, 2019-04-24, from 12:00 to 13:00 CDT - Services will be unavailable during this time.

Commit 6a1d647a authored by Francois Tessier's avatar Francois Tessier

First attempt to fix the Fence bug on XC40. Need to try out on Theta.

parent 28930113
......@@ -192,12 +192,19 @@ int Tapioca::Write (MPI_File fileHandle, MPI_Offset offset, void *buf,
* Wait if it's not the appropriate round
*/
while ( this->roundsIds[targetRoundIdx] > this->currentRound_ ) {
/* TODO: Need to add a condition. What if only one aggr is necessary during the last round ?*/
for ( i = 0; i < this->nAggr_ ; i++ ) {
fprintf (stdout, "[INFO] %d calls 1st GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->GlobalFence (var);
fprintf (stdout, "[INFO] %d passes 1st GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->currentFence_++;
this->roundCounter_++;
#ifdef XC40
if ( this->totalNeededBuffers_ != this->countNeededBuffers_ ) {
#endif
fprintf (stdout, "[INFO] %d calls 1st GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->GlobalFence (var);
fprintf (stdout, "[INFO] %d passes 1st GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->currentFence_++;
this->countNeededBuffers_++;
#ifdef XC40
}
#endif
}
if ( this->amAnAggr_ ) {
......@@ -242,13 +249,20 @@ int Tapioca::Write (MPI_File fileHandle, MPI_Offset offset, void *buf,
* If all the data have been written, wait
*/
if ( this->currentDataSize_ == this->rankDataSize_) {
while ( this->roundCounter_ < this->totalRounds_) {
fprintf (stdout, "[INFO] %d calls 2nd GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
while ( this->countNeededBuffers_ < this->totalNeededBuffers_) {
fprintf (stdout, "[INFO] %d calls 2nd GlobalFence, buff %d, dat %s (r: %d, f: %d)\n",
this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->GlobalFence (var);
fprintf (stdout, "[INFO] %d passes 2nd GlobalFence, buff %d, dat %s (r: %d, f: %d)\n", this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->currentFence_++;
if ( (this->roundCounter_ % this->nAggr_) == 0 ) {
fprintf (stdout, "[INFO] %d passes 2nd GlobalFence, buff %d, dat %s (r: %d, f: %d)\n",
this->commRank_, (this->currentRound_ % NBUFFERS), var, this->currentRound_, this->currentFence_);
this->currentFence_++;
#ifdef XC40
this->countNeededBuffers_++;
#endif
if ( (this->countNeededBuffers_ % this->nAggr_) == 0 ) {
if ( this->amAnAggr_ ) {
// if (iRequest != NULL)
// MPI_Wait ( &iRequest, &iStatus );
......@@ -256,13 +270,12 @@ int Tapioca::Write (MPI_File fileHandle, MPI_Offset offset, void *buf,
this->Push (fileHandle, status);
//this->iPush (fileHandle, &iRequest);
}
#ifdef BGQ
fprintf (stdout, "[INFO] %d increment 2nd currentRound (r: %d, rc: %d, nag: %d)\n", this->commRank_, this->currentRound_, this->roundCounter_, this->nAggr_);
fprintf (stdout, "[INFO] %d increment 2nd currentRound (r: %d, rc: %d, nag: %d)\n", this->commRank_, this->currentRound_, this->countNeededBuffers_, this->nAggr_);
this->currentRound_++;
#endif
}
this->roundCounter_++;
#ifdef BGQ
this->countNeededBuffers_++;
#endif
}
}
......@@ -417,7 +430,7 @@ void Tapioca::SetDefaultValues ()
this->amAnAggr_ = false;
this->commSplit_ = true;
this->currentRound_ = 0;
this->roundCounter_ = 0;
this->countNeededBuffers_ = 0;
this->currentDataSize_ = 0;
this->nCommit_ = 0;
this->writeDevNull_ = false;
......@@ -519,9 +532,9 @@ void Tapioca::IdentifyMyAggregators ()
int64_t remainingData, offsetInAggrData;
std::vector<Round_t> rounds;
this->totalRounds_ = ceil ( (double)this->commDataSize_ / (double)this->bufferSize_ );
this->totalNeededBuffers_ = ceil ( (double)this->commDataSize_ / (double)this->bufferSize_ );
for ( i = 0; i < this->totalRounds_; i++ ) {
for ( i = 0; i < this->totalNeededBuffers_; i++ ) {
Round_t r;
r.aggr = i % this->nAggr_;
r.round = i / this->nAggr_;
......@@ -557,10 +570,10 @@ void Tapioca::IdentifyMyAggregators ()
#ifdef DEBUG
if (this->commRank_ == 4) {
fprintf (stdout, "[DEBUG] Rounds distrib. on %d aggregators: AGG ", this->nAggr_);
for ( i = 0; i < this->totalRounds_; i++ )
for ( i = 0; i < this->totalNeededBuffers_; i++ )
fprintf (stdout, "%d ", rounds[i].aggr);
fprintf (stdout, "\n RND ");
for ( i = 0; i < this->totalRounds_; i++ )
for ( i = 0; i < this->totalNeededBuffers_; i++ )
fprintf (stdout, "%d ", rounds[i].round);
fprintf (stdout, "\n AGG ");
for ( i = 0; i < this->globalAggregatorsRanks.size(); i++ )
......
......@@ -135,8 +135,8 @@ class Tapioca
int nAggr_;
int currentRound_;
int totalRounds_;
int roundCounter_;
int totalNeededBuffers_;
int countNeededBuffers_;
int64_t currentDataSize_;
int intCoords_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment