Commit 31ed3f71 authored by Steven J. Plimpton's avatar Steven J. Plimpton
Browse files

more changes to insure each triplet IJK computed exactly once

parent c01fb5f7
......@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 4000 atoms
Time spent = 0.00126314 secs
Time spent = 0.00139618 secs
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0
......@@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8899813 0 -3.3408686 -4.2298176
5 1.0337853 -4.8928208 0 -3.3425304 -4.2233154
10 1.0358056 -4.8953304 0 -3.3420104 -4.1897183
15 1.0380938 -4.8990457 0 -3.3422942 -4.1310148
20 1.0389566 -4.9014345 0 -3.3433892 -4.0406616
25 1.0358313 -4.8989663 0 -3.3456079 -3.9093019
Loop time of 12.2062 on 1 procs for 25 steps with 4000 atoms
0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 15.95 on 1 procs for 25 steps with 4000 atoms
Performance: 353.920 tau/day, 2.048 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 270.846 tau/day, 1.567 timesteps/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 12.202 | 12.202 | 12.202 | 0.0 | 99.96
Pair | 15.946 | 15.946 | 15.946 | 0.0 | 99.97
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0015621 | 0.0015621 | 0.0015621 | 0.0 | 0.01
Output | 0.00020814 | 0.00020814 | 0.00020814 | 0.0 | 0.00
Modify | 0.0019698 | 0.0019698 | 0.0019698 | 0.0 | 0.02
Other | | 0.0007734 | | | 0.01
Comm | 0.0015042 | 0.0015042 | 0.0015042 | 0.0 | 0.01
Output | 0.00013781 | 0.00013781 | 0.00013781 | 0.0 | 0.00
Modify | 0.0017776 | 0.0017776 | 0.0017776 | 0.0 | 0.01
Other | | 0.0006771 | | | 0.00
Nlocal: 4000 ave 4000 max 4000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
......@@ -97,4 +97,4 @@ Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:13
Total wall time: 0:00:16
......@@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 4000 atoms
Time spent = 0.000785112 secs
Time spent = 0.000900984 secs
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
pair_coeff * * lj/cut 1.0 1.0
......@@ -60,26 +60,26 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.033 -4.8921547 0 -3.343042 -4.2340557
5 1.0337949 -4.8947881 0 -3.3444835 -4.2271456
10 1.0358286 -4.8973178 0 -3.3439632 -4.1935779
15 1.0381322 -4.9010593 0 -3.3442503 -4.134913
20 1.0390107 -4.9034854 0 -3.3453589 -4.0446162
25 1.0358988 -4.9010506 0 -3.3475908 -3.9133006
Loop time of 3.20632 on 4 procs for 25 steps with 4000 atoms
0 1.033 -4.8404387 0 -3.291326 -4.1332095
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
Loop time of 4.34636 on 4 procs for 25 steps with 4000 atoms
Performance: 1347.340 tau/day, 7.797 timesteps/s
100.0% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 993.935 tau/day, 5.752 timesteps/s
99.6% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.1207 | 3.1553 | 3.1859 | 1.5 | 98.41
Pair | 3.9977 | 4.1036 | 4.209 | 4.9 | 94.41
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.019466 | 0.05009 | 0.084602 | 12.0 | 1.56
Output | 7.1049e-05 | 8.2076e-05 | 0.00011325 | 0.0 | 0.00
Modify | 0.00056338 | 0.00057292 | 0.00058413 | 0.0 | 0.02
Other | | 0.0003092 | | | 0.01
Comm | 0.13588 | 0.24134 | 0.34722 | 20.4 | 5.55
Output | 0.00013757 | 0.00015104 | 0.00016761 | 0.0 | 0.00
Modify | 0.00087953 | 0.00091547 | 0.00095582 | 0.0 | 0.02
Other | | 0.0003656 | | | 0.01
Nlocal: 1000 ave 1000 max 1000 min
Histogram: 4 0 0 0 0 0 0 0 0 0
......@@ -97,4 +97,4 @@ Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:03
Total wall time: 0:00:04
......@@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag)
numneigh = list->numneigh;
firstneigh = list->firstneigh;
int count1 = 0;
int count2 = 0;
int count3 = 0;
// triple loop over local atoms and neighbors twice
// must compute each IJK triplet interaction exactly once
// by proc that owns the triplet atom with smallest x coord
// special logic to break ties if multiple atoms have same x or y coords
// inner two loops for jj=1,Jnum and kk=jj+1,Jnum insure
// the pair of other 2 non-minimum-x atoms is only considered once
// triplet geometry criteria for calculation:
// each pair distance <= cutoff
// produce of 3 pair distances <= cutoff_triple^3
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
......@@ -112,12 +118,10 @@ void PairATM::compute(int eflag, int vflag)
jnum = numneigh[i];
jnumm1 = jnum - 1;
// for (jj = 0; jj < jnumm1; jj++) {
// replace with this line:
for (jj = 0; jj < jnum; jj++) {
for (jj = 0; jj < jnumm1; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
rij[0] = x[j][0] - xi;
if (rij[0] < 0.0) continue;
rij[1] = x[j][1] - yi;
......@@ -125,40 +129,33 @@ void PairATM::compute(int eflag, int vflag)
rij[2] = x[j][2] - zi;
if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue;
rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2];
count1++;
if (rij2 > cutoff_squared) continue;
count2++;
//for (kk = jj+1; kk < jnum; kk++) {
// replace with these two lines:
for (kk = 0; kk < jnum; kk++) {
if (kk == jj) continue;
for (kk = jj+1; kk < jnum; kk++) {
k = jlist[kk];
k &= NEIGHMASK;
rjk[0] = x[k][0] - x[j][0];
if (rjk[0] < 0.0) continue;
rjk[1] = x[k][1] - x[j][1];
if (rjk[0] == 0.0 and rjk[1] < 0.0) continue;
rjk[2] = x[k][2] - x[j][2];
if (rjk[0] == 0.0 and rjk[1] == 0.0 and rjk[2] < 0.0) continue;
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;
rik[0] = x[k][0] - xi;
if (rik[0] < 0.0) continue;
rik[1] = x[k][1] - yi;
if (rik[0] == 0.0 and rik[1] < 0.0) continue;
rik[2] = x[k][2] - zi;
if (rik[0] == 0.0 and rik[1] == 0.0 and rik[2] < 0.0) continue;
rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2];
if (rik2 > cutoff_squared) continue;
rjk[0] = x[k][0] - x[j][0];
rjk[1] = x[k][1] - x[j][1];
rjk[2] = x[k][2] - x[j][2];
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
if (rjk2 > cutoff_squared) continue;
double r6 = rij2*rjk2*rik2;
if (r6 > cutoff_triple_sixth) continue;
nu_local = nu[type[i]][type[j]][type[k]];
if (nu_local == 0.0) continue;
count3++;
interaction_ddd(nu_local,
r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl);
......@@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag)
}
}
int count = count1;
MPI_Allreduce(&count,&count1,1,MPI_INT,MPI_SUM,world);
count = count2;
MPI_Allreduce(&count,&count2,1,MPI_INT,MPI_SUM,world);
count = count3;
MPI_Allreduce(&count,&count3,1,MPI_INT,MPI_SUM,world);
printf("FORCE %g %d %d %d\n",cutoff_squared,count1,count2,count3);
if (vflag_fdotr) virial_fdotr_compute();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment