Commit d88056ae authored by Huihuo Zheng's avatar Huihuo Zheng
Browse files

fixed MPI_win error for large scale

parent a2bbe520
......@@ -63,6 +63,7 @@ void *H5Dread_pthread_func(void *args) {
}
}
MPI_Win_fence(MPI_MODE_NOSUCCEED, H5DMM.win);
if (io_node()==H5DMM.rank and debug_level()>2) cout << "PTHREAD DONE" << endl;
H5DMM.num_request--;
} else {
pthread_cond_signal(&H5DMM.master_cond);
......@@ -137,7 +138,7 @@ void create_mmap(char *path, H5Dio_mmap &f) {
close(fh);
f.fd = open(f.filename, O_RDWR);
f.buf = mmap(NULL, ss, PROT_READ | PROT_WRITE, MAP_SHARED, f.fd, 0);
msync(f.buf, ss, MS_SYNC);
//msync(f.buf, ss, MS_SYNC);
} else {
if (io_node()==f.rank and debug_level()>1)
cout << " Allocate buffer in the memory and attached it to a MPI_Win" << endl;
......@@ -229,7 +230,8 @@ herr_t H5Dread_to_cache(hid_t dataset_id, hid_t mem_type_id,
hid_t mem_space_id, hid_t file_space_id,
hid_t xfer_plist_id, void * dat) {
herr_t err = H5Dread(dataset_id, mem_type_id, mem_space_id, file_space_id, xfer_plist_id, dat);
if (io_node()==H5DMM.rank and debug_level()>1)
cout << " H5Dread from file system done.." << endl;
hsize_t bytes = get_buf_size(mem_space_id, mem_type_id);
double t0 = MPI_Wtime();
H5PthreadWait();// notice that the first batch it will not wait
......@@ -260,10 +262,13 @@ herr_t H5Dread_to_cache(hid_t dataset_id, hid_t mem_type_id,
herr_t H5Dread_from_cache(hid_t dataset_id, hid_t mem_type_id,
hid_t mem_space_id, hid_t file_space_id,
hid_t xfer_plist_id, void * dat) {
if (io_node()==H5DMM.rank and debug_level()>1) {
cout << "Reading data from cache (H5Dread_from_cache)" << endl;
}
bool contig = false;
vector<int> b;
get_samples_from_filespace(file_space_id, b, contig);
H5PthreadWait();
get_samples_from_filespace(file_space_id, b, contig);
MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, H5DMM.win);
char *p_mmap = (char *) H5DMM.buf;
char *p_mem = (char *) dat;
......@@ -309,8 +314,8 @@ herr_t H5DMMF_remap() {
munmap(H5DMM.buf, ss);
close(H5DMM.fd);
H5DMM.fd = open(H5DMM.filename, O_RDWR);
H5DMM.buf = mmap(NULL, ss, PROT_READ | PROT_WRITE, MAP_SHARED, H5DMM.fd, 0);
msync(H5DMM.buf, ss, MS_SYNC);
H5DMM.buf = mmap(NULL, ss, PROT_READ, MAP_SHARED, H5DMM.fd, 0);
//msync(H5DMM.buf, ss, MS_SYNC);
}
return 0;
}
......
#Makefile
CXX=mpicxx -g
CXX=mpicxx -g -O3
#HDF5_ROOT=/blues/gpfs/software/centos7/spack/opt/spack/linux-centos7-x86_64/gcc-8.2.0/hdf5-1.10.5-vozfsah/
CFLAGS=-I$(HDF5_ROOT)/include -O3 -I../utils
HDF5_LIB=-L$(HDF5_ROOT)/lib -lhdf5
......
......@@ -76,6 +76,7 @@ int main(int argc, char **argv) {
int batch_size = 32;
int rank_shift = 0;
int num_images = 1;
bool barrier = false; // set this always to be false. this is just for debug purpose
bool remap = false;
int i=0;
Timing tt(io_node()==rank);
......@@ -107,6 +108,8 @@ int main(int argc, char **argv) {
strcpy(local_storage, argv[i+1]); i+=2;
} else if (strcmp(argv[i], "--compute")==0) {
compute = atof(argv[i+1]); i+=2;
} else if (strcmp(argv[i], "--barrier")==0) {
barrier = true; i = i+1;
} else {
i=i+1;
}
......@@ -214,11 +217,14 @@ int main(int argc, char **argv) {
num_batches*batch_size*dim*sizeof(float)/tt["H5Dread"].t/1024/1024*nproc);
// Epochs 1 - ... reading data directly from local storage
MPI_Barrier(MPI_COMM_WORLD);
if (barrier)
MPI_Barrier(MPI_COMM_WORLD);
for(int e =1; e < epochs; e++) {
if (remap)
if (remap) {
tt.start_clock("REMAP");
H5DMMF_remap();
tt.stop_clock("REMAP");
}
if (shuffle) ::shuffle(id.begin(), id.end(), g);
parallel_dist(num_images, nproc, (rank+e*rank_shift)%nproc, ns_loc, fs_loc);
double t1 = 0.0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment