Commit 97e59b16 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

remove content of PR #723 from this branch to avoid merge issues

parent 7e8baf8b
......@@ -10,11 +10,9 @@ replicate command :h3
[Syntax:]
replicate nx ny nz {keyword} :pre
replicate nx ny nz :pre
nx,ny,nz = replication factors in each dimension :ulb
optional {keyword} = {bbox} :l
{bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule
nx,ny,nz = replication factors in each dimension :ul
[Examples:]
......@@ -45,12 +43,6 @@ file that crosses a periodic boundary should be between two atoms with
image flags that differ by 1. This will allow the bond to be
unwrapped appropriately.
The optional keyword {bbox} uses a bounding box to only check atoms
in replicas that overlap with a processor's subdomain when assigning
atoms to processors, and thus can result in substantial speedups for
calculations using a large number of processors. It does require
temporarily using more memory.
[Restrictions:]
A 2d simulation cannot be replicated in the z dimension.
......
......@@ -44,7 +44,7 @@ void Replicate::command(int narg, char **arg)
if (domain->box_exist == 0)
error->all(FLERR,"Replicate command before simulation box is defined");
if (narg < 3 || narg > 4) error->all(FLERR,"Illegal replicate command");
if (narg != 3) error->all(FLERR,"Illegal replicate command");
int me = comm->me;
int nprocs = comm->nprocs;
......@@ -58,10 +58,6 @@ void Replicate::command(int narg, char **arg)
int nz = force->inumeric(FLERR,arg[2]);
int nrep = nx*ny*nz;
int bbox_flag = 0;
if (narg == 4)
if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1;
// error and warning checks
if (nx <= 0 || ny <= 0 || nz <= 0)
......@@ -103,37 +99,6 @@ void Replicate::command(int narg, char **arg)
maxmol = maxmol_all;
}
// check image flags maximum extent; only efficient small image flags compared to new system
int _imagelo[3], _imagehi[3];
_imagelo[0] = 0;
_imagelo[1] = 0;
_imagelo[2] = 0;
_imagehi[0] = 0;
_imagehi[1] = 0;
_imagehi[2] = 0;
if (bbox_flag) {
for (i=0; i<atom->nlocal; ++i) {
imageint image = atom->image[i];
int xbox = (image & IMGMASK) - IMGMAX;
int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX;
int zbox = (image >> IMG2BITS) - IMGMAX;
if (xbox < _imagelo[0]) _imagelo[0] = xbox;
if (ybox < _imagelo[1]) _imagelo[1] = ybox;
if (zbox < _imagelo[2]) _imagelo[2] = zbox;
if (xbox > _imagehi[0]) _imagehi[0] = xbox;
if (ybox > _imagehi[1]) _imagehi[1] = ybox;
if (zbox > _imagehi[2]) _imagehi[2] = zbox;
}
MPI_Allreduce(MPI_IN_PLACE, &(_imagelo[0]), 3, MPI_INT, MPI_MIN, world);
MPI_Allreduce(MPI_IN_PLACE, &(_imagehi[0]), 3, MPI_INT, MPI_MAX, world);
}
// unmap existing atoms via image flags
for (i = 0; i < atom->nlocal; i++)
......@@ -315,392 +280,93 @@ void Replicate::command(int narg, char **arg)
double *coord;
int tag_enable = atom->tag_enable;
if (bbox_flag) {
// allgather size of buf on each proc
n = 0;
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
int * size_buf_rnk;
memory->create(size_buf_rnk, nprocs, "replicate:size_buf_rnk");
MPI_Allgather(&n, 1, MPI_INT, size_buf_rnk, 1, MPI_INT, world);
// size of buf_all
int size_buf_all = 0;
MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world);
if (me == 0 && screen) {
fprintf(screen," bounding box image = (%i %i %i) to (%i %i %i)\n",
_imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]);
fprintf(screen," bounding box extra memory = %.2f MB\n",
(double)size_buf_all*sizeof(double)/1024/1024);
for (int iproc = 0; iproc < nprocs; iproc++) {
if (me == iproc) {
n = 0;
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
}
// rnk offsets
int * disp_buf_rnk;
memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk");
disp_buf_rnk[0] = 0;
for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];
// allgather buf_all
double * buf_all;
memory->create(buf_all, size_buf_all, "replicate:buf_all");
MPI_Allgatherv(buf, n, MPI_DOUBLE, buf_all, size_buf_rnk, disp_buf_rnk, MPI_DOUBLE, world);
// bounding box of original unwrapped system
double _orig_lo[3], _orig_hi[3];
if (triclinic) {
_orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd + _imagelo[1] * old_xy + _imagelo[2] * old_xz;
_orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd + _imagelo[2] * old_yz;
_orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd;
_orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd + (_imagehi[1]+1) * old_xy + (_imagehi[2]+1) * old_xz;
_orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd + (_imagehi[2]+1) * old_yz;
_orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd;
} else {
_orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd;
_orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd;
_orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd;
_orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd;
_orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd;
_orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd;
}
double _lo[3], _hi[3];
int num_replicas_added = 0;
MPI_Bcast(&n,1,MPI_INT,iproc,world);
MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world);
for (ix = 0; ix < nx; ix++) {
for (iy = 0; iy < ny; iy++) {
for (iz = 0; iz < nz; iz++) {
// domain->remap() overwrites coordinates, so always recompute here
if (triclinic) {
_lo[0] = _orig_lo[0] + ix * old_xprd + iy * old_xy + iz * old_xz;
_hi[0] = _orig_hi[0] + ix * old_xprd + iy * old_xy + iz * old_xz;
_lo[1] = _orig_lo[1] + iy * old_yprd + iz * old_yz;
_hi[1] = _orig_hi[1] + iy * old_yprd + iz * old_yz;
_lo[2] = _orig_lo[2] + iz * old_zprd;
_hi[2] = _orig_hi[2] + iz * old_zprd;
} else {
_lo[0] = _orig_lo[0] + ix * old_xprd;
_hi[0] = _orig_hi[0] + ix * old_xprd;
_lo[1] = _orig_lo[1] + iy * old_yprd;
_hi[1] = _orig_hi[1] + iy * old_yprd;
_lo[2] = _orig_lo[2] + iz * old_zprd;
_hi[2] = _orig_hi[2] + iz * old_zprd;
}
// test if bounding box of shifted replica overlaps sub-domain of proc
// if not, then skip testing atoms
int xoverlap = 1;
int yoverlap = 1;
int zoverlap = 1;
if (triclinic) {
double _llo[3];
domain->x2lamda(_lo,_llo);
double _lhi[3];
domain->x2lamda(_hi,_lhi);
if (_llo[0] > (subhi[0] - EPSILON)
|| _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
if (_llo[1] > (subhi[1] - EPSILON)
|| _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
if (_llo[2] > (subhi[2] - EPSILON)
|| _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
} else {
if (_lo[0] > (subhi[0] - EPSILON)
|| _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
if (_lo[1] > (subhi[1] - EPSILON)
|| _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
if (_lo[2] > (subhi[2] - EPSILON)
|| _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
}
int overlap = 0;
if (xoverlap && yoverlap && zoverlap) overlap = 1;
// if no overlap, test if bounding box wrapped back into new system
if (!overlap) {
// wrap back into cell
imageint imagelo = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
domain->remap(&(_lo[0]), imagelo);
int xboxlo = (imagelo & IMGMASK) - IMGMAX;
int yboxlo = (imagelo >> IMGBITS & IMGMASK) - IMGMAX;
int zboxlo = (imagelo >> IMG2BITS) - IMGMAX;
// while loop over one proc's atom list
imageint imagehi = ((imageint) IMGMAX << IMG2BITS) |
m = 0;
while (m < n) {
image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
domain->remap(&(_hi[0]), imagehi);
int xboxhi = (imagehi & IMGMASK) - IMGMAX;
int yboxhi = (imagehi >> IMGBITS & IMGMASK) - IMGMAX;
int zboxhi = (imagehi >> IMG2BITS) - IMGMAX;
if (triclinic) {
double _llo[3];
_llo[0] = _lo[0]; _llo[1] = _lo[1]; _llo[2] = _lo[2];
domain->x2lamda(_llo,_lo);
double _lhi[3];
_lhi[0] = _hi[0]; _lhi[1] = _hi[1]; _lhi[2] = _hi[2];
domain->x2lamda(_lhi,_hi);
}
// test all fragments for any overlap; ok to include false positives
int _xoverlap1 = 0;
int _xoverlap2 = 0;
if (!xoverlap) {
if (xboxlo < 0) {
_xoverlap1 = 1;
if ( _lo[0] > (subhi[0] - EPSILON) ) _xoverlap1 = 0;
}
if (xboxhi > 0) {
_xoverlap2 = 1;
if ( _hi[0] < (sublo[0] + EPSILON) ) _xoverlap2 = 0;
}
if (_xoverlap1 || _xoverlap2) xoverlap = 1;
}
int _yoverlap1 = 0;
int _yoverlap2 = 0;
if (!yoverlap) {
if (yboxlo < 0) {
_yoverlap1 = 1;
if ( _lo[1] > (subhi[1] - EPSILON) ) _yoverlap1 = 0;
}
if (yboxhi > 0) {
_yoverlap2 = 1;
if ( _hi[1] < (sublo[1] + EPSILON) ) _yoverlap2 = 0;
}
if (_yoverlap1 || _yoverlap2) yoverlap = 1;
}
int _zoverlap1 = 0;
int _zoverlap2 = 0;
if (!zoverlap) {
if (zboxlo < 0) {
_zoverlap1 = 1;
if ( _lo[2] > (subhi[2] - EPSILON) ) _zoverlap1 = 0;
}
if (zboxhi > 0) {
_zoverlap2 = 1;
if ( _hi[2] < (sublo[2] + EPSILON) ) _zoverlap2 = 0;
}
if (_zoverlap1 || _zoverlap2) zoverlap = 1;
if (triclinic == 0) {
x[0] = buf[m+1] + ix*old_xprd;
x[1] = buf[m+2] + iy*old_yprd;
x[2] = buf[m+3] + iz*old_zprd;
} else {
x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
x[1] = buf[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf[m+3] + iz*old_zprd;
}
// does either fragment overlap w/ sub-domain
if (xoverlap && yoverlap && zoverlap) overlap = 1;
}
// while loop over one proc's atom list
if (overlap) {
num_replicas_added++;
m = 0;
while (m < size_buf_all) {
image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
if (triclinic == 0) {
x[0] = buf_all[m+1] + ix*old_xprd;
x[1] = buf_all[m+2] + iy*old_yprd;
x[2] = buf_all[m+3] + iz*old_zprd;
} else {
x[0] = buf_all[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
x[1] = buf_all[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf_all[m+3] + iz*old_zprd;
}
domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
m += avec->unpack_restart(&buf_all[m]);
i = atom->nlocal - 1;
if (tag_enable)
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag;
else atom_offset = 0;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
m += avec->unpack_restart(&buf[m]);
i = atom->nlocal - 1;
if (tag_enable)
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag;
else atom_offset = 0;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
} else m += static_cast<int> (buf_all[m]);
}
} // if (overlap)
}
}
}
memory->destroy(size_buf_rnk);
memory->destroy(disp_buf_rnk);
memory->destroy(buf_all);
int sum = 0;
MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world);
double avg = (double) sum / nprocs;
if (me == 0 && screen)
fprintf(screen," average # of replicas added to proc = %.2f out of %i (%.2f %%)\n",
avg,nx*ny*nz,avg/(nx*ny*nz)*100.0);
} else {
for (int iproc = 0; iproc < nprocs; iproc++) {
if (me == iproc) {
n = 0;
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
}
MPI_Bcast(&n,1,MPI_INT,iproc,world);
MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world);
for (ix = 0; ix < nx; ix++) {
for (iy = 0; iy < ny; iy++) {
for (iz = 0; iz < nz; iz++) {
// while loop over one proc's atom list
m = 0;
while (m < n) {
image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
if (triclinic == 0) {
x[0] = buf[m+1] + ix*old_xprd;
x[1] = buf[m+2] + iy*old_yprd;
x[2] = buf[m+3] + iz*old_zprd;
} else {
x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
x[1] = buf[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf[m+3] + iz*old_zprd;
}
domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
m += avec->unpack_restart(&buf[m]);
i = atom->nlocal - 1;
if (tag_enable)
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag;
else atom_offset = 0;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
}
} else m += static_cast<int> (buf[m]);
}
} else m += static_cast<int> (buf[m]);
}
}
}
}
} // if (bbox_flag)
}
// free communication buffer and old atom class
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment