Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2e451b9d
Commit
2e451b9d
authored
Dec 13, 2012
by
Peter Eastman
Browse files
Deleted the old CUDA platform
parent
352e2fc7
Changes
147
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
0 additions
and
2268 deletions
+0
-2268
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
+0
-182
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
...c/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
+0
-465
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
+0
-170
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
+0
-181
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...uda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+0
-419
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...cuda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+0
-275
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+0
-576
No files found.
Too many changes to show.
To preserve performance only
147 of 147+
files are displayed.
Plain diff
Email patch
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
deleted
100644 → 0
View file @
352e2fc7
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnF1
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float4
delta
,
float4
bn
,
float
bn5
,
float
forceFactor
,
#ifdef APPLY_SCALE
const
float
*
scalingFactors
,
#endif
float
force
[
3
],
float
*
energy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
float
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
// float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
atomJ
.
labFrameQuadrupole
[
0
]
+
atomJ
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
#ifdef APPLY_SCALE
float
offset
=
1
.
0
f
-
scalingFactors
[
MScaleIndex
];
float
rr3
=
rr1
*
rr1
*
rr1
;
float
gf4
=
2
.
0
f
*
(
bn2
-
3
.
0
f
*
offset
*
rr3
*
rr1
*
rr1
);
#else
float
gf4
=
2
.
0
f
*
bn2
;
#endif
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qkdi1
=
qk1
*
di1
+
qk2
*
di2
+
qk3
*
di3
;
float
ftm21
=
gf4
*
(
qkdi1
-
qidk1
);
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qkdi2
=
qk2
*
di1
+
qk5
*
di2
+
qk6
*
di3
;
float
ftm22
=
gf4
*
(
qkdi2
-
qidk2
);
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
qkdi3
=
qk3
*
di1
+
qk6
*
di2
+
qk9
*
di3
;
float
ftm23
=
gf4
*
(
qkdi3
-
qidk3
);
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
#ifdef APPLY_SCALE
float
gf7
=
4
.
0
f
*
(
bn3
-
15
.
0
f
*
offset
*
rr3
*
rr3
*
rr1
);
#else
float
gf7
=
4
.
0
f
*
bn3
;
#endif
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qkqir1
=
qk1
*
qir1
+
qk2
*
qir2
+
qk3
*
qir3
;
ftm21
+=
gf7
*
(
qiqkr1
+
qkqir1
);
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qkqir2
=
qk2
*
qir1
+
qk5
*
qir2
+
qk6
*
qir3
;
ftm22
+=
gf7
*
(
qiqkr2
+
qkqir2
);
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
qkqir3
=
qk3
*
qir1
+
qk6
*
qir2
+
qk9
*
qir3
;
ftm23
+=
gf7
*
(
qiqkr3
+
qkqir3
);
// calculate the scalar products for permanent components
float
gl6
=
di1
*
dk1
+
di2
*
dk2
+
di3
*
dk3
;
float
gl7
=
2
.
0
f
*
(
qir1
*
dk1
+
qir2
*
dk2
+
qir3
*
dk3
-
(
qkr1
*
di1
+
qkr2
*
di2
+
qkr3
*
di3
)
);
float
gl5
=
-
4
.
0
f
*
(
qir1
*
qkr1
+
qir2
*
qkr2
+
qir3
*
qkr3
);
float
gl8
=
2
.
0
f
*
(
qi1
*
qk1
+
qi2
*
qk2
+
qi3
*
qk3
+
qi2
*
qk2
+
qi5
*
qk5
+
qi6
*
qk6
+
qi3
*
qk3
+
qi6
*
qk6
+
qi9
*
qk9
);
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
gl0
=
ci
*
ck
;
float
gl1
=
ck
*
sc3
-
ci
*
sc4
;
float
gl2
=
ci
*
sc6
+
ck
*
sc5
-
sc3
*
sc4
;
float
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
float
gl4
=
sc5
*
sc6
;
#ifdef APPLY_SCALE
//forceTorqueEnergy->w += forceFactor*(-offset*rr1*gl0 + (bn1-offset*rr3)*(gl1+gl6) + (bn2-offset*(3.0f*rr3*rr1*rr1))*(gl2+gl7+gl8) + (bn3-offset*(15.0f*rr3*rr3*rr1))*(gl3+gl5) + (bn4-offset*(105.0f*rr3*rr3*rr3))*gl4);
*
energy
+=
forceFactor
*
(
-
offset
*
rr1
*
gl0
+
(
bn1
-
offset
*
rr3
)
*
(
gl1
+
gl6
)
+
(
bn2
-
offset
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
))
*
(
gl2
+
gl7
+
gl8
)
+
(
bn3
-
offset
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
))
*
(
gl3
+
gl5
)
+
(
bn4
-
offset
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
))
*
gl4
);
#else
//forceTorqueEnergy->w += bn1*(gl1+gl6) + bn2*(gl2+gl7+gl8) + bn3*(gl3+gl5) + bn4*gl4;
*
energy
+=
forceFactor
*
(
bn1
*
(
gl1
+
gl6
)
+
bn2
*
(
gl2
+
gl7
+
gl8
)
+
bn3
*
(
gl3
+
gl5
)
+
bn4
*
gl4
);
#endif
float
gf1
=
bn1
*
gl0
+
bn2
*
(
gl1
+
gl6
)
+
bn3
*
(
gl2
+
gl7
+
gl8
)
+
bn4
*
(
gl3
+
gl5
)
+
bn5
*
gl4
;
#ifdef APPLY_SCALE
gf1
-=
offset
*
(
rr3
*
gl0
+
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
gl1
+
gl6
)
+
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
gl2
+
gl7
+
gl8
)
+
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)
*
(
gl3
+
gl5
)
+
(
945
.
0
f
*
rr3
*
rr3
*
rr3
*
rr1
*
rr1
)
*
gl4
);
#endif
ftm21
+=
gf1
*
xr
;
ftm22
+=
gf1
*
yr
;
ftm23
+=
gf1
*
zr
;
#ifdef APPLY_SCALE
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
-
offset
*
(
-
ck
*
rr3
+
sc4
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
-
sc6
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
));
#else
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#endif
ftm21
+=
gf2
*
di1
;
ftm22
+=
gf2
*
di2
;
ftm23
+=
gf2
*
di3
;
#ifdef APPLY_SCALE
float
gf5
=
2
.
0
f
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
-
offset
*
(
-
ck
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
+
sc4
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
-
sc6
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)));
#else
float
gf5
=
2
.
0
f
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#endif
ftm21
+=
gf5
*
qir1
;
ftm22
+=
gf5
*
qir2
;
ftm23
+=
gf5
*
qir3
;
#ifdef APPLY_SCALE
float
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
-
offset
*
(
ci
*
rr3
+
sc3
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
+
sc5
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
));
#else
float
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
#endif
ftm21
+=
gf3
*
dk1
;
ftm22
+=
gf3
*
dk2
;
ftm23
+=
gf3
*
dk3
;
#ifdef APPLY_SCALE
float
gf6
=
2
.
0
f
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
-
offset
*
(
-
ci
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
-
sc3
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
-
sc5
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)));
#else
float
gf6
=
2
.
0
f
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
);
#endif
ftm21
+=
gf6
*
qkr1
;
ftm22
+=
gf6
*
qkr2
;
ftm23
+=
gf6
*
qkr3
;
force
[
0
]
=
ftm21
;
force
[
1
]
=
ftm22
;
force
[
2
]
=
ftm23
;
/*
if( forceFactor == 1.0f ){
atomJ.force[0] -= ftm21;
atomJ.force[1] -= ftm22;
atomJ.force[2] -= ftm23;
}
atomI.force[0] += ftm21;
atomI.force[1] += ftm22;
atomI.force[2] += ftm23;
*/
return
;
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
deleted
100644 → 0
View file @
352e2fc7
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnF2
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float4
delta
,
float4
bn
,
float
forceFactor
,
#ifdef APPLY_SCALE
const
float
*
scalingFactors
,
#endif
float
force
[
3
],
float
*
energy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
// float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0
.
0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
1
.
0
f
/
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
damp
=
damp
<
-
50
.
0
f
?
0
.
0
f
:
damp
;
}
float
scale5
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
(
1
.
0
f
-
damp
)
*
expf
(
damp
));
float
rr5
=
rr1
*
rr1
;
rr5
=
3
.
0
f
*
rr1
*
rr5
*
rr5
;
#ifdef APPLY_SCALE
float
psc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
PScaleIndex
]);
float
dsc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
DScaleIndex
]);
float
usc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
UScaleIndex
]);
#else
float
psc5
=
rr5
*
(
1
.
0
f
-
scale5
);
#endif
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm21
=
-
bn2
*
(
qiuk1
+
qiukp1
);
#ifdef APPLY_SCALE
ftm21
+=
qiuk1
*
psc5
+
qiukp1
*
dsc5
;
#else
ftm21
+=
(
qiuk1
+
qiukp1
)
*
psc5
;
#endif
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm22
=
-
bn2
*
(
qiuk2
+
qiukp2
);
#ifdef APPLY_SCALE
ftm22
+=
((
qiuk2
)
*
psc5
+
(
qiukp2
)
*
dsc5
);
#else
ftm22
+=
(
qiuk2
+
qiukp2
)
*
psc5
;
#endif
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm23
=
-
bn2
*
(
qiuk3
+
qiukp3
);
#ifdef APPLY_SCALE
ftm23
+=
((
qiuk3
)
*
psc5
+
(
qiukp3
)
*
dsc5
);
#else
ftm23
+=
(
qiuk3
+
qiukp3
)
*
psc5
;
#endif
float
expdamp
=
expf
(
damp
);
float
scale3
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
expdamp
);
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
PScaleIndex
]);
float
dsc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
DScaleIndex
]);
float
usc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
UScaleIndex
]);
#else
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
);
#endif
float
scale7
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
(
1
.
0
f
-
damp
+
0
.
6
f
*
damp
*
damp
)
*
expdamp
);
#ifdef APPLY_SCALE
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
PScaleIndex
]);
float
dsc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
DScaleIndex
]);
#else
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
);
#endif
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
float
prefactor1
;
prefactor1
=
0
.
5
f
*
(
ci
*
psc3
+
sc3
*
psc5
+
sc5
*
psc7
-
gfi3
);
ftm21
-=
prefactor1
*
atomJ
.
inducedDipole
[
0
];
ftm22
-=
prefactor1
*
atomJ
.
inducedDipole
[
1
];
ftm23
-=
prefactor1
*
atomJ
.
inducedDipole
[
2
];
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
ci
*
dsc3
+
sc3
*
dsc5
+
sc5
*
dsc7
-
gfi3
);
#endif
ftm21
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
0
];
ftm22
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
1
];
ftm23
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
2
];
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
//forceTorqueEnergy->w += 0.5f*((psc3-bn1)*(ci*sci4) + (psc5-bn2)*(sc3*sci4) + (psc7-bn3)*(sci4*sc5));
*
energy
+=
forceFactor
*
0
.
5
f
*
sci4
*
((
psc3
-
bn1
)
*
ci
+
(
psc5
-
bn2
)
*
sc3
+
(
psc7
-
bn3
)
*
sc5
);
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0
.
5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
])
);
ftm22
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
])
);
ftm23
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
])
);
}
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
*
(
sci4
+
scip4
)
-
(
sci4
*
psc5
+
scip4
*
dsc5
)
);
#else
sci4
+=
scip4
;
prefactor1
=
0
.
5
f
*
sci4
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
di1
;
ftm22
+=
prefactor1
*
di2
;
ftm23
+=
prefactor1
*
di3
;
#ifdef APPLY_SCALE
float
gfi5
=
bn3
*
(
sci4
+
scip4
)
-
(
sci4
*
psc7
+
scip4
*
dsc7
);
#else
float
gfi5
=
sci4
*
(
bn3
-
psc7
);
#endif
ftm21
+=
gfi5
*
qir1
;
ftm22
+=
gfi5
*
qir2
;
ftm23
+=
gfi5
*
qir3
;
float
sci7
=
qir1
*
atomJ
.
inducedDipole
[
0
]
+
qir2
*
atomJ
.
inducedDipole
[
1
]
+
qir3
*
atomJ
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += (bn2-psc5)*sci7;
*
energy
+=
forceFactor
*
(
bn2
-
psc5
)
*
sci7
;
float
scip7
=
qir1
*
atomJ
.
inducedDipoleP
[
0
]
+
qir2
*
atomJ
.
inducedDipoleP
[
1
]
+
qir3
*
atomJ
.
inducedDipoleP
[
2
];
#ifdef APPLY_SCALE
float
gli1
=
-
ci
*
sci4
;
float
gli2
=
-
sc3
*
sci4
+
2
.
0
f
*
sci7
;
float
gli3
=
-
sci4
*
sc5
;
float
glip1
=
-
ci
*
scip4
;
float
glip2
=
-
sc3
*
scip4
+
2
.
0
f
*
scip7
;
float
glip3
=
-
scip4
*
sc5
;
#else
float
gli1
=
-
ci
*
sci4
;
float
gli2
=
-
sc3
*
sci4
+
2
.
0
f
*
(
sci7
+
scip7
);
float
gli3
=
-
sci4
*
sc5
;
#endif
#ifdef APPLY_SCALE
float
gfi1
=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
.
0
f
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
.
0
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
);
#else
float
gfi1
=
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
;
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
gli1
*
psc3
+
5
.
0
f
*
gli2
*
psc5
+
7
.
0
f
*
gli3
*
psc7
);
#endif
gfi1
*=
0
.
5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
if
(
damp
!=
0
.
0
f
){
float
expdamp
=
expf
(
damp
);
float
temp3
=
-
1
.
5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
float
temp5
=
-
damp
;
float
temp7
=
-
0
.
2
f
-
0
.
6
f
*
damp
;
float
ddsc31
=
temp3
*
xr
;
float
ddsc32
=
temp3
*
yr
;
float
ddsc33
=
temp3
*
zr
;
float
ddsc51
=
temp5
*
ddsc31
;
float
ddsc52
=
temp5
*
ddsc32
;
float
ddsc53
=
temp5
*
ddsc33
;
float
ddsc71
=
temp7
*
ddsc51
;
float
ddsc72
=
temp7
*
ddsc52
;
float
ddsc73
=
temp7
*
ddsc53
;
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
(
gli1
*
scalingFactors
[
PScaleIndex
]
+
glip1
*
scalingFactors
[
DScaleIndex
]);
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
(
gli2
*
scalingFactors
[
PScaleIndex
]
+
glip2
*
scalingFactors
[
DScaleIndex
]);
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
#else
temp3
=
gli1
;
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
gli3
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
}
//K
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
//float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
qk1
+
qk5
);
float
qkui1
=
qk1
*
atomI
.
inducedDipole
[
0
]
+
qk2
*
atomI
.
inducedDipole
[
1
]
+
qk3
*
atomI
.
inducedDipole
[
2
];
float
qkuip1
=
qk1
*
atomI
.
inducedDipoleP
[
0
]
+
qk2
*
atomI
.
inducedDipoleP
[
1
]
+
qk3
*
atomI
.
inducedDipoleP
[
2
];
ftm21
+=
bn2
*
(
qkui1
+
qkuip1
);
#ifdef APPLY_SCALE
ftm21
-=
(
qkui1
*
psc5
+
qkuip1
*
dsc5
);
#else
ftm21
-=
(
qkui1
+
qkuip1
)
*
psc5
;
#endif
float
qkui2
=
qk2
*
atomI
.
inducedDipole
[
0
]
+
qk5
*
atomI
.
inducedDipole
[
1
]
+
qk6
*
atomI
.
inducedDipole
[
2
];
float
qkuip2
=
qk2
*
atomI
.
inducedDipoleP
[
0
]
+
qk5
*
atomI
.
inducedDipoleP
[
1
]
+
qk6
*
atomI
.
inducedDipoleP
[
2
];
ftm22
+=
bn2
*
(
qkui2
+
qkuip2
);
#ifdef APPLY_SCALE
ftm22
-=
((
qkui2
)
*
psc5
+
(
qkuip2
)
*
dsc5
);
#else
ftm22
-=
(
qkui2
+
qkuip2
)
*
psc5
;
#endif
float
qkui3
=
qk3
*
atomI
.
inducedDipole
[
0
]
+
qk6
*
atomI
.
inducedDipole
[
1
]
+
qk9
*
atomI
.
inducedDipole
[
2
];
float
qkuip3
=
qk3
*
atomI
.
inducedDipoleP
[
0
]
+
qk6
*
atomI
.
inducedDipoleP
[
1
]
+
qk9
*
atomI
.
inducedDipoleP
[
2
];
ftm23
+=
bn2
*
(
qkui3
+
qkuip3
);
#ifdef APPLY_SCALE
ftm23
-=
((
qkui3
)
*
psc5
+
(
qkuip3
)
*
dsc5
);
#else
ftm23
-=
(
qkui3
+
qkuip3
)
*
psc5
;
#endif
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
ck
=
atomJ
.
q
;
float
gfi2
=
(
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
);
prefactor1
=
0
.
5
f
*
(
ck
*
psc3
-
sc4
*
psc5
+
sc6
*
psc7
+
gfi2
);
ftm21
+=
prefactor1
*
atomI
.
inducedDipole
[
0
];
ftm22
+=
prefactor1
*
atomI
.
inducedDipole
[
1
];
ftm23
+=
prefactor1
*
atomI
.
inducedDipole
[
2
];
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
ck
*
dsc3
-
sc4
*
dsc5
+
sc6
*
dsc7
+
gfi2
);
#endif
ftm21
+=
prefactor1
*
atomI
.
inducedDipoleP
[
0
];
ftm22
+=
prefactor1
*
atomI
.
inducedDipoleP
[
1
];
ftm23
+=
prefactor1
*
atomI
.
inducedDipoleP
[
2
];
float
sci3
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
*
energy
+=
forceFactor
*
0
.
5
f
*
sci3
*
(
ck
*
(
bn1
-
psc3
)
-
sc4
*
(
bn2
-
psc5
)
+
sc6
*
(
bn3
-
psc7
)
);
float
scip3
=
atomI
.
inducedDipoleP
[
0
]
*
xr
+
atomI
.
inducedDipoleP
[
1
]
*
yr
+
atomI
.
inducedDipoleP
[
2
]
*
zr
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0
.
5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]
);
ftm22
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]
);
ftm23
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]
);
}
float
sci34
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
sci34
=
(
sci3
*
scip4
+
scip3
*
sci4
);
#ifdef APPLY_SCALE
gfi1
=
sci34
*
(
usc5
*
(
5
.
0
f
*
rr1
*
rr1
)
-
bn3
);
#else
gfi1
=
sci34
*
(
psc5
*
(
5
.
0
f
*
rr1
*
rr1
)
-
bn3
);
#endif
}
else
{
gfi1
=
0
.
0
f
;
}
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
*
(
sci3
+
scip3
)
-
(
sci3
*
psc5
+
scip3
*
dsc5
)
);
#else
sci3
+=
scip3
;
prefactor1
=
0
.
5
f
*
sci3
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
dk1
;
ftm22
+=
prefactor1
*
dk2
;
ftm23
+=
prefactor1
*
dk3
;
#ifdef APPLY_SCALE
float
gfi6
=
-
bn3
*
(
sci3
+
scip3
)
+
(
sci3
*
psc7
+
scip3
*
dsc7
);
#else
float
gfi6
=
sci3
*
(
psc7
-
bn3
);
#endif
ftm21
+=
gfi6
*
qkr1
;
ftm22
+=
gfi6
*
qkr2
;
ftm23
+=
gfi6
*
qkr3
;
float
sci1
=
atomI
.
inducedDipole
[
0
]
*
dk1
+
atomI
.
inducedDipole
[
1
]
*
dk2
+
atomI
.
inducedDipole
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipole
[
0
]
+
di2
*
atomJ
.
inducedDipole
[
1
]
+
di3
*
atomJ
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += 0.5f*( sci1*(bn1-psc3) );
*
energy
+=
forceFactor
*
0
.
5
f
*
(
sci1
*
(
bn1
-
psc3
)
);
float
sci8
=
qkr1
*
atomI
.
inducedDipole
[
0
]
+
qkr2
*
atomI
.
inducedDipole
[
1
]
+
qkr3
*
atomI
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += sci8*(bn2-psc5);
*
energy
-=
forceFactor
*
sci8
*
(
bn2
-
psc5
);
float
scip1
=
atomI
.
inducedDipoleP
[
0
]
*
dk1
+
atomI
.
inducedDipoleP
[
1
]
*
dk2
+
atomI
.
inducedDipoleP
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipoleP
[
0
]
+
di2
*
atomJ
.
inducedDipoleP
[
1
]
+
di3
*
atomJ
.
inducedDipoleP
[
2
];
#ifndef APPLY_SCALE
sci1
+=
scip1
;
#endif
float
scip2
=
atomI
.
inducedDipole
[
0
]
*
atomJ
.
inducedDipoleP
[
0
]
+
atomI
.
inducedDipole
[
1
]
*
atomJ
.
inducedDipoleP
[
1
]
+
atomI
.
inducedDipole
[
2
]
*
atomJ
.
inducedDipoleP
[
2
]
+
atomJ
.
inducedDipole
[
0
]
*
atomI
.
inducedDipoleP
[
0
]
+
atomJ
.
inducedDipole
[
1
]
*
atomI
.
inducedDipoleP
[
1
]
+
atomJ
.
inducedDipole
[
2
]
*
atomI
.
inducedDipoleP
[
2
];
float
scip8
=
qkr1
*
atomI
.
inducedDipoleP
[
0
]
+
qkr2
*
atomI
.
inducedDipoleP
[
1
]
+
qkr3
*
atomI
.
inducedDipoleP
[
2
];
#ifndef APPLY_SCALE
sci8
+=
scip8
;
#endif
gli1
=
ck
*
sci3
+
sci1
;
gli2
=
-
(
sci3
*
sc4
+
2
.
0
f
*
sci8
);
gli3
=
sci3
*
sc6
;
#ifdef APPLY_SCALE
glip1
=
ck
*
scip3
+
scip1
;
glip2
=
-
(
scip3
*
sc4
+
2
.
0
f
*
scip8
);
glip3
=
scip3
*
sc6
;
#endif
#ifdef APPLY_SCALE
gfi1
+=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
.
0
f
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
.
0
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
);
#else
gfi1
+=
(
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
);
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
gli1
*
psc3
+
5
.
0
f
*
gli2
*
psc5
+
7
.
0
f
*
gli3
*
psc7
);
#endif
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
gfi1
+=
scip2
*
(
bn2
-
(
3
.
0
f
*
rr1
*
rr1
)
*
usc3
);
#else
gfi1
+=
scip2
*
(
bn2
-
(
3
.
0
f
*
rr1
*
rr1
)
*
psc3
);
#endif
}
gfi1
*=
0
.
5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
if
(
damp
!=
0
.
0
f
){
float
expdamp
=
expf
(
damp
);
float
temp3
=
-
1
.
5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
float
temp5
=
-
damp
;
float
temp7
=
-
0
.
2
f
-
0
.
6
f
*
damp
;
float
ddsc31
=
temp3
*
xr
;
float
ddsc32
=
temp3
*
yr
;
float
ddsc33
=
temp3
*
zr
;
float
ddsc51
=
temp5
*
ddsc31
;
float
ddsc52
=
temp5
*
ddsc32
;
float
ddsc53
=
temp5
*
ddsc33
;
float
ddsc71
=
temp7
*
ddsc51
;
float
ddsc72
=
temp7
*
ddsc52
;
float
ddsc73
=
temp7
*
ddsc53
;
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
gli1
*
scalingFactors
[
PScaleIndex
]
+
glip1
*
scalingFactors
[
DScaleIndex
];
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
(
gli2
*
scalingFactors
[
PScaleIndex
]
+
glip2
*
scalingFactors
[
DScaleIndex
]);
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
#else
temp3
=
gli1
;
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
);
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
temp3
=
scalingFactors
[
UScaleIndex
]
*
scip2
;
temp5
=
-
(
3
.
0
f
*
rr1
*
rr1
)
*
scalingFactors
[
UScaleIndex
]
*
sci34
;
#else
temp3
=
scip2
;
temp5
=
-
(
3
.
0
f
*
rr1
*
rr1
)
*
sci34
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
}
}
force
[
0
]
+=
ftm21
;
force
[
1
]
+=
ftm22
;
force
[
2
]
+=
ftm23
;
/*
if( forceFactor == 1.0f ){
atomJ.force[0] -= ftm21;
atomJ.force[1] -= ftm22;
atomJ.force[2] -= ftm23;
}
atomI.force[0] += ftm21;
atomI.force[1] += ftm22;
atomI.force[2] += ftm23;
*/
/*
forceTorqueEnergy->x += ftm21;
forceTorqueEnergy->y += ftm22;
forceTorqueEnergy->z += ftm23;
*/
return
;
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
deleted
100644 → 0
View file @
352e2fc7
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnT1
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
#ifdef APPLY_SCALE
,
const
float
*
scalingFactors
#endif
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
float
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
//float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
atomJ
.
labFrameQuadrupole
[
0
]
+
atomJ
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3
.
0
f
*
rr3
*
rr2
;
float
rr7
=
5
.
0
f
*
rr5
*
rr2
;
float
rr9
=
7
.
0
f
*
rr7
*
rr2
;
float
scale
=
1
.
0
f
-
scalingFactors
[
MScaleIndex
];
float
prefactor
=
scale
*
rr3
-
bn1
;
#else
float
prefactor
=
-
bn1
;
#endif
float
dixdk1
=
di2
*
dk3
-
di3
*
dk2
;
float
ttm21
=
prefactor
*
dixdk1
;
float
dixdk2
=
di3
*
dk1
-
di1
*
dk3
;
float
ttm22
=
prefactor
*
dixdk2
;
float
dixdk3
=
di1
*
dk2
-
di2
*
dk1
;
float
ttm23
=
prefactor
*
dixdk3
;
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
rxqikr1
=
yr
*
qiqkr3
-
zr
*
qiqkr2
;
float
qkrxqir1
=
qkr2
*
qir3
-
qkr3
*
qir2
;
#ifdef APPLY_SCALE
prefactor
=
4
.
0
f
*
(
bn3
-
scale
*
rr7
);
#else
prefactor
=
4
.
0
f
*
bn3
;
#endif
ttm21
-=
prefactor
*
(
rxqikr1
+
qkrxqir1
);
float
rxqikr2
=
zr
*
qiqkr1
-
xr
*
qiqkr3
;
float
qkrxqir2
=
qkr3
*
qir1
-
qkr1
*
qir3
;
ttm22
-=
prefactor
*
(
rxqikr2
+
qkrxqir2
);
float
rxqikr3
=
xr
*
qiqkr2
-
yr
*
qiqkr1
;
float
qkrxqir3
=
qkr1
*
qir2
-
qkr2
*
qir1
;
ttm23
-=
prefactor
*
(
rxqikr3
+
qkrxqir3
);
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
dixqkr1
=
di2
*
qkr3
-
di3
*
qkr2
;
float
dkxqir1
=
dk2
*
qir3
-
dk3
*
qir2
;
float
rxqidk1
=
yr
*
qidk3
-
zr
*
qidk2
;
float
qixqk1
=
qi2
*
qk3
+
qi5
*
qk6
+
qi6
*
qk9
-
qi3
*
qk2
-
qi6
*
qk5
-
qi9
*
qk6
;
#ifdef APPLY_SCALE
prefactor
=
2
.
0
f
*
(
bn2
-
scale
*
rr5
);
#else
prefactor
=
2
.
0
f
*
bn2
;
#endif
ttm21
+=
prefactor
*
(
dixqkr1
+
dkxqir1
+
rxqidk1
-
2
.
0
f
*
qixqk1
);
float
dixqkr2
=
di3
*
qkr1
-
di1
*
qkr3
;
float
dkxqir2
=
dk3
*
qir1
-
dk1
*
qir3
;
float
rxqidk2
=
zr
*
qidk1
-
xr
*
qidk3
;
float
qixqk2
=
qi3
*
qk1
+
qi6
*
qk2
+
qi9
*
qk3
-
qi1
*
qk3
-
qi2
*
qk6
-
qi3
*
qk9
;
ttm22
+=
prefactor
*
(
dixqkr2
+
dkxqir2
+
rxqidk2
-
2
.
0
f
*
qixqk2
);
float
dixqkr3
=
di1
*
qkr2
-
di2
*
qkr1
;
float
dkxqir3
=
dk1
*
qir2
-
dk2
*
qir1
;
float
rxqidk3
=
xr
*
qidk2
-
yr
*
qidk1
;
float
qixqk3
=
qi1
*
qk2
+
qi2
*
qk5
+
qi3
*
qk6
-
qi2
*
qk1
-
qi5
*
qk2
-
qi6
*
qk3
;
ttm23
+=
prefactor
*
(
dixqkr3
+
dkxqir3
+
rxqidk3
-
2
.
0
f
*
qixqk3
);
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#ifdef APPLY_SCALE
float
gfr2
=
-
ck
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
prefactor
=
(
gf2
-
scale
*
gfr2
);
#else
prefactor
=
gf2
;
#endif
ttm21
+=
prefactor
*
(
di2
*
zr
-
di3
*
yr
);
ttm22
+=
prefactor
*
(
di3
*
xr
-
di1
*
zr
);
ttm23
+=
prefactor
*
(
di1
*
yr
-
di2
*
xr
);
float
gf5
=
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#ifdef APPLY_SCALE
float
gfr5
=
(
-
ck
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
prefactor
=
2
.
0
f
*
(
gf5
-
scale
*
gfr5
);
#else
prefactor
=
2
.
0
f
*
gf5
;
#endif
float
rxqir1
=
yr
*
qir3
-
zr
*
qir2
;
float
rxqir2
=
zr
*
qir1
-
xr
*
qir3
;
float
rxqir3
=
xr
*
qir2
-
yr
*
qir1
;
ttm21
-=
prefactor
*
rxqir1
;
ttm22
-=
prefactor
*
rxqir2
;
ttm23
-=
prefactor
*
rxqir3
;
atomI
.
torque
[
0
]
+=
ttm21
;
atomI
.
torque
[
1
]
+=
ttm22
;
atomI
.
torque
[
2
]
+=
ttm23
;
/*
torque[0] = ttm21;
torque[1] = ttm22;
torque[2] = ttm23;
*/
return
;
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
deleted
100644 → 0
View file @
352e2fc7
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnT2
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
#ifdef APPLY_SCALE
,
const
float
*
scalingFactors
#endif
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
// apply Thole polarization damping to scale factors
float
scale3
=
1
.
0
f
;
float
scale5
=
1
.
0
f
;
float
scale7
=
1
.
0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0
.
0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
1
.
0
f
/
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
if
(
damp
>
-
50
.
0
f
){
float
expdamp
=
expf
(
damp
);
scale3
=
1
.
0
f
-
expdamp
;
scale5
=
1
.
0
f
-
(
1
.
0
f
-
damp
)
*
expdamp
;
scale7
=
1
.
0
f
-
(
1
.
0
f
-
damp
+
0
.
6
f
*
damp
*
damp
)
*
expdamp
;
}
}
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
float
dsc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
DScaleIndex
]);
float
dsc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
DScaleIndex
]);
float
dsc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
DScaleIndex
]);
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
PScaleIndex
]);
float
psc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
PScaleIndex
]);
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
PScaleIndex
]);
#else
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
);
float
psc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
);
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
);
#endif
float
prefactor1
=
0
.
5
f
*
(
psc3
-
bn1
);
#ifdef APPLY_SCALE
float
prefactor2
=
0
.
5
f
*
(
dsc3
-
bn1
);
#endif
float
dixuk1
=
di2
*
atomJ
.
inducedDipole
[
2
]
-
di3
*
atomJ
.
inducedDipole
[
1
];
float
dixukp1
=
di2
*
atomJ
.
inducedDipoleP
[
2
]
-
di3
*
atomJ
.
inducedDipoleP
[
1
];
#ifdef APPLY_SCALE
float
ttm2i1
=
prefactor1
*
dixuk1
+
prefactor2
*
dixukp1
;
#else
float
ttm2i1
=
prefactor1
*
(
dixuk1
+
dixukp1
);
#endif
float
dixuk2
=
di3
*
atomJ
.
inducedDipole
[
0
]
-
di1
*
atomJ
.
inducedDipole
[
2
];
float
dixukp2
=
di3
*
atomJ
.
inducedDipoleP
[
0
]
-
di1
*
atomJ
.
inducedDipoleP
[
2
];
#ifdef APPLY_SCALE
float
ttm2i2
=
prefactor1
*
dixuk2
+
prefactor2
*
dixukp2
;
#else
float
ttm2i2
=
prefactor1
*
(
dixuk2
+
dixukp2
);
#endif
float
dixuk3
=
di1
*
atomJ
.
inducedDipole
[
1
]
-
di2
*
atomJ
.
inducedDipole
[
0
];
float
dixukp3
=
di1
*
atomJ
.
inducedDipoleP
[
1
]
-
di2
*
atomJ
.
inducedDipoleP
[
0
];
#ifdef APPLY_SCALE
float
ttm2i3
=
prefactor1
*
dixuk3
+
prefactor2
*
dixukp3
;
#else
float
ttm2i3
=
prefactor1
*
(
dixuk3
+
dixukp3
);
#endif
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
float
gti2
=
bn2
*
(
sci4
+
scip4
);
#ifdef APPLY_SCALE
float
gtri2
=
(
sci4
*
psc5
+
scip4
*
dsc5
);
#else
float
gtri2
=
psc5
*
(
sci4
+
scip4
);
#endif
prefactor1
=
0
.
5
f
*
(
gti2
-
gtri2
);
ttm2i1
+=
prefactor1
*
(
di2
*
zr
-
di3
*
yr
);
ttm2i2
+=
prefactor1
*
(
di3
*
xr
-
di1
*
zr
);
ttm2i3
+=
prefactor1
*
(
di1
*
yr
-
di2
*
xr
);
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
#ifdef APPLY_SCALE
prefactor1
=
sci4
*
psc7
+
scip4
*
dsc7
-
bn3
*
(
sci4
+
scip4
);
#else
prefactor1
=
psc7
*
(
sci4
+
scip4
)
-
bn3
*
(
sci4
+
scip4
);
#endif
ttm2i1
+=
prefactor1
*
(
yr
*
qir3
-
zr
*
qir2
);
ttm2i2
+=
prefactor1
*
(
zr
*
qir1
-
xr
*
qir3
);
ttm2i3
+=
prefactor1
*
(
xr
*
qir2
-
yr
*
qir1
);
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
prefactor1
=
(
bn2
-
psc5
);
#ifdef APPLY_SCALE
prefactor2
=
(
bn2
-
dsc5
);
#endif
float
ukxqir1
=
atomJ
.
inducedDipole
[
1
]
*
qir3
-
atomJ
.
inducedDipole
[
2
]
*
qir2
;
float
ukxqirp1
=
atomJ
.
inducedDipoleP
[
1
]
*
qir3
-
atomJ
.
inducedDipoleP
[
2
]
*
qir2
;
float
rxqiuk1
=
yr
*
qiuk3
-
zr
*
qiuk2
;
float
rxqiukp1
=
yr
*
qiukp3
-
zr
*
qiukp2
;
#ifdef APPLY_SCALE
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
)
+
prefactor2
*
(
ukxqirp1
+
rxqiukp1
);
#else
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
+
ukxqirp1
+
rxqiukp1
);
#endif
float
ukxqir2
=
atomJ
.
inducedDipole
[
2
]
*
qir1
-
atomJ
.
inducedDipole
[
0
]
*
qir3
;
float
ukxqirp2
=
atomJ
.
inducedDipoleP
[
2
]
*
qir1
-
atomJ
.
inducedDipoleP
[
0
]
*
qir3
;
float
rxqiuk2
=
zr
*
qiuk1
-
xr
*
qiuk3
;
float
rxqiukp2
=
zr
*
qiukp1
-
xr
*
qiukp3
;
#ifdef APPLY_SCALE
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
)
+
prefactor2
*
(
ukxqirp2
+
rxqiukp2
);
#else
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
+
ukxqirp2
+
rxqiukp2
);
#endif
float
ukxqir3
=
atomJ
.
inducedDipole
[
0
]
*
qir2
-
atomJ
.
inducedDipole
[
1
]
*
qir1
;
float
ukxqirp3
=
atomJ
.
inducedDipoleP
[
0
]
*
qir2
-
atomJ
.
inducedDipoleP
[
1
]
*
qir1
;
float
rxqiuk3
=
xr
*
qiuk2
-
yr
*
qiuk1
;
float
rxqiukp3
=
xr
*
qiukp2
-
yr
*
qiukp1
;
#ifdef APPLY_SCALE
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
)
+
prefactor2
*
(
ukxqirp3
+
rxqiukp3
);
#else
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
+
ukxqirp3
+
rxqiukp3
);
#endif
atomI
.
torque
[
0
]
+=
ttm2i1
;
atomI
.
torque
[
1
]
+=
ttm2i2
;
atomI
.
torque
[
2
]
+=
ttm2i3
;
/*
torque[0] += ttm2i1;
torque[1] += ttm2i2;
torque[2] += ttm2i3;
*/
return
;
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
deleted
100644 → 0
View file @
352e2fc7
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kReducePmeEFieldPolar_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
EFieldReciprocal
,
float
*
fieldIn
,
float
*
fieldOut
)
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
//const float term = 0.0f;
while
(
pos
<
fieldComponents
)
{
// self-term included here
float
totalField
=
EFieldReciprocal
[
pos
]
+
term
*
cAmoebaSim
.
pLabFrameDipole
[
pos
];
float
*
pFt
=
fieldIn
+
pos
;
unsigned
int
i
=
outputBuffers
;
while
(
i
>=
4
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
]
+
pFt
[
2
*
fieldComponents
]
+
pFt
[
3
*
fieldComponents
];
pFt
+=
fieldComponents
*
4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
];
pFt
+=
fieldComponents
*
2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalField
+=
pFt
[
0
];
}
fieldOut
[
pos
]
=
totalField
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kReducePmeEField_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
)
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
//const float term = 0.0;
while
(
pos
<
fieldComponents
)
{
// self-term included here
float
totalField
=
term
*
cAmoebaSim
.
pLabFrameDipole
[
pos
];
float
*
pFt
=
fieldIn
+
pos
;
unsigned
int
i
=
outputBuffers
;
while
(
i
>=
4
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
]
+
pFt
[
2
*
fieldComponents
]
+
pFt
[
3
*
fieldComponents
];
pFt
+=
fieldComponents
*
4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
];
pFt
+=
fieldComponents
*
2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalField
+=
pFt
[
0
];
}
fieldOut
[
pos
]
+=
totalField
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
// reduce psWorkArray_3_1 -> EField
// reduce psWorkArray_3_2 -> EFieldPolar
static
void
kReducePmeDirectE_Fields
(
amoebaGpuContext
amoebaGpu
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// E_FieldPolar = E_Field (reciprocal) + E_FieldPolar (direct) + self
kReducePmeEFieldPolar_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeE_Fields1"
);
// E_Field = E_Field (reciprocal) + E_Field (direct) + self
kReducePmeEField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeE_Fields2"
);
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
#undef GK
#undef INCLUDE_FIXED_FIELD_BUFFERS
#define INCLUDE_FIXED_FIELD_BUFFERS
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
#undef INCLUDE_FIXED_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
atomI
.
tempBuffer
[
1
]
+=
atomJ
.
tempBuffer
[
1
];
atomI
.
tempBuffer
[
2
]
+=
atomJ
.
tempBuffer
[
2
];
atomI
.
tempBufferP
[
0
]
+=
atomJ
.
tempBufferP
[
0
];
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float4
fields
[
3
]){
// compute the real space portion of the Ewald summation
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic boundary conditions
xr
-=
floorf
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floorf
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floorf
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfcf
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
expf
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn3
=
(
5.0
f
*
bn2
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
expf
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
scale7
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
+
(
0.6
f
*
damp
*
damp
));
}
}
float
dsc3
=
dscale
*
scale3
;
float
dsc5
=
dscale
*
scale5
;
float
dsc7
=
dscale
*
scale7
;
float
psc3
=
pscale
*
scale3
;
float
psc5
=
pscale
*
scale5
;
float
psc7
=
pscale
*
scale7
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
r7
=
(
r5
*
r2
);
float
drr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
drr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
drr7
=
15.0
f
*
(
1.0
f
-
dsc7
)
/
r7
;
float
prr3
=
(
1.0
f
-
psc3
)
/
r3
;
float
prr5
=
3.0
f
*
(
1.0
f
-
psc5
)
/
r5
;
float
prr7
=
15.0
f
*
(
1.0
f
-
psc7
)
/
r7
;
float
dir
=
atomI
.
labFrameDipole_X
*
xr
+
atomI
.
labFrameDipole_Y
*
yr
+
atomI
.
labFrameDipole_Z
*
zr
;
float
qix
=
atomI
.
labFrameQuadrupole_XX
*
xr
+
atomI
.
labFrameQuadrupole_XY
*
yr
+
atomI
.
labFrameQuadrupole_XZ
*
zr
;
float
qiy
=
atomI
.
labFrameQuadrupole_XY
*
xr
+
atomI
.
labFrameQuadrupole_YY
*
yr
+
atomI
.
labFrameQuadrupole_YZ
*
zr
;
float
qiz
=
atomI
.
labFrameQuadrupole_XZ
*
xr
+
atomI
.
labFrameQuadrupole_YZ
*
yr
+
atomI
.
labFrameQuadrupole_ZZ
*
zr
;
float
qir
=
qix
*
xr
+
qiy
*
yr
+
qiz
*
zr
;
float
dkr
=
atomJ
.
labFrameDipole_X
*
xr
+
atomJ
.
labFrameDipole_Y
*
yr
+
atomJ
.
labFrameDipole_Z
*
zr
;
float
qkx
=
atomJ
.
labFrameQuadrupole_XX
*
xr
+
atomJ
.
labFrameQuadrupole_XY
*
yr
+
atomJ
.
labFrameQuadrupole_XZ
*
zr
;
float
qky
=
atomJ
.
labFrameQuadrupole_XY
*
xr
+
atomJ
.
labFrameQuadrupole_YY
*
yr
+
atomJ
.
labFrameQuadrupole_YZ
*
zr
;
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
fim0
=
-
xr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn2
*
qkx
;
float
fim1
=
-
yr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn2
*
qky
;
float
fim2
=
-
zr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn2
*
qkz
;
float
fkm0
=
xr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn2
*
qix
;
float
fkm1
=
yr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn2
*
qiy
;
float
fkm2
=
zr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn2
*
qiz
;
float
fid0
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
float
fid1
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
float
fid2
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
float
fkd0
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
float
fkd1
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
float
fkd2
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
float
fip0
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
float
fip1
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
float
fip2
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
float
fkp0
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
float
fkp1
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
float
fkp2
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
// increment the field at each site due to this interaction
fields
[
0
].
x
=
fim0
-
fid0
;
fields
[
1
].
x
=
fim1
-
fid1
;
fields
[
2
].
x
=
fim2
-
fid2
;
fields
[
0
].
y
=
fkm0
-
fkd0
;
fields
[
1
].
y
=
fkm1
-
fkd1
;
fields
[
2
].
y
=
fkm2
-
fkd2
;
fields
[
0
].
z
=
fim0
-
fip0
;
fields
[
1
].
z
=
fim1
-
fip1
;
fields
[
2
].
z
=
fim2
-
fip2
;
fields
[
0
].
w
=
fkm0
-
fkp0
;
fields
[
1
].
w
=
fkm1
-
fkp1
;
fields
[
2
].
w
=
fkm2
-
fkp2
;
}
else
{
fields
[
0
].
x
=
0.0
f
;
fields
[
0
].
y
=
0.0
f
;
fields
[
0
].
z
=
0.0
f
;
fields
[
0
].
w
=
0.0
f
;
fields
[
1
].
x
=
0.0
f
;
fields
[
1
].
y
=
0.0
f
;
fields
[
1
].
z
=
0.0
f
;
fields
[
1
].
w
=
0.0
f
;
fields
[
2
].
x
=
0.0
f
;
fields
[
2
].
y
=
0.0
f
;
fields
[
2
].
z
=
0.0
f
;
fields
[
2
].
w
=
0.0
f
;
}
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
/**---------------------------------------------------------------------------------------
Report whether a number is a nan or infinity
@param number number to test
@return 1 if number is nan or infinity; else return 0
--------------------------------------------------------------------------------------- */
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
192
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeDirectFixedE_FieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
}
else
{
kCalculateAmoebaPmeDirectFixedE_FieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
kReducePmeDirectE_Fields
(
amoebaGpu
);
}
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
kCalculateAmoebaPMEFixedMultipoles
(
amoebaGpu
);
cudaComputeAmoebaPmeDirectFixedEField
(
amoebaGpu
);
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
deleted
100644 → 0
View file @
352e2fc7
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
192
,
1
)
#else
__launch_bounds__
(
64
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputEField
,
float
*
outputEFieldPolar
){
extern
__shared__
FixedFieldParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
unsigned
int
x
;
unsigned
int
y
;
bool
bExclusionFlag
;
float
dScaleValue
;
float
pScaleValue
;
int
dScaleMask
;
int2
pScaleMask
;
// extract cell coordinates
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
FixedFieldParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
FixedFieldParticle
localParticle
;
loadFixedFieldShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
fieldSum
[
0
]
=
0
.
0
f
;
fieldSum
[
1
]
=
0
.
0
f
;
fieldSum
[
2
]
=
0
.
0
f
;
fieldPolarSum
[
0
]
=
0
.
0
f
;
fieldPolarSum
[
1
]
=
0
.
0
f
;
fieldPolarSum
[
2
]
=
0
.
0
f
;
if
(
x
==
y
)
{
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
if
(
bExclusionFlag
){
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
}
else
{
dScaleValue
=
pScaleValue
=
1
.
0
f
;
}
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
(
bExclusionFlag
){
getMaskedDScaleFactor
(
j
,
dScaleMask
,
&
dScaleValue
);
getMaskedPScaleFactor
(
j
,
pScaleMask
,
&
pScaleValue
);
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned
int
match
=
(
(
atomI
==
(
y
+
j
))
||
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
j
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
].
x
;
fieldSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
x
;
fieldSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
x
;
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
].
z
;
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
].
z
;
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputEField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
#endif
}
else
{
if
(
lasty
!=
y
)
{
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
// zero shared fields
zeroFixedFieldParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
if
(
bExclusionFlag
)
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
}
else
{
dScaleValue
=
pScaleValue
=
1
.
0
f
;
}
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
if
(
bExclusionFlag
){
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
&
dScaleValue
);
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
&
pScaleValue
);
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cSim
.
atoms
)
||
((
y
+
jIdx
)
>=
cSim
.
atoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
if
(
flags
==
0xFFFFFFFF
){
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
jIdx
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
psA
[
jIdx
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
psA
[
jIdx
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
psA
[
jIdx
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
psA
[
jIdx
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
psA
[
jIdx
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
eField
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// j-loop block
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
eField
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
eFieldP
,
outputEFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputEField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
eField
,
outputEField
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
eFieldP
,
outputEFieldPolar
);
#endif
}
// end of pInteractionFlag block
lasty
=
y
;
}
// x == y block
pos
++
;
}
}
plugins/amoeba/platforms/cuda-old/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
deleted
100644 → 0
View file @
352e2fc7
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "openmm/OpenMMException.h"
#include <stdio.h>
using
namespace
std
;
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoebaCudaPmeMutualInducedFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeMutualInducedFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
#undef INCLUDE_MI_FIELD_BUFFERS
#define INCLUDE_MI_FIELD_BUFFERS
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#ifdef INCLUDE_MI_FIELD_BUFFERS
__device__
void
sumTempBuffer
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
){
atomI
.
tempBuffer
[
0
]
+=
atomJ
.
tempBuffer
[
0
];
atomI
.
tempBuffer
[
1
]
+=
atomJ
.
tempBuffer
[
1
];
atomI
.
tempBuffer
[
2
]
+=
atomJ
.
tempBuffer
[
2
];
atomI
.
tempBufferP
[
0
]
+=
atomJ
.
tempBufferP
[
0
];
atomI
.
tempBufferP
[
1
]
+=
atomJ
.
tempBufferP
[
1
];
atomI
.
tempBufferP
[
2
]
+=
atomJ
.
tempBufferP
[
2
];
}
#endif
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
setupMutualInducedFieldPairIxn_kernel
(
const
MutualInducedParticle
&
atomI
,
const
MutualInducedParticle
&
atomJ
,
const
float
uscale
,
float4
*
delta
,
float
*
preFactor2
)
{
// compute thedelta->xeal space portion of the Ewald summation
delta
->
x
=
atomJ
.
x
-
atomI
.
x
;
delta
->
y
=
atomJ
.
y
-
atomI
.
y
;
delta
->
z
=
atomJ
.
z
-
atomI
.
z
;
// pdelta->xiodic boundary conditions
delta
->
x
-=
floorf
(
delta
->
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
->
y
-=
floorf
(
delta
->
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
->
z
-=
floorf
(
delta
->
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
(
delta
->
x
*
delta
->
x
)
+
(
delta
->
y
*
delta
->
y
)
+
(
delta
->
z
*
delta
->
z
);
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfcf
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
expf
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
expf
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
delta
->
w
=
rr3
-
bn1
;
*
preFactor2
=
bn2
-
rr5
;
}
else
{
delta
->
w
=
*
preFactor2
=
0.0
f
;
}
}
__device__
void
calculateMutualInducedFieldPairIxn_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
+=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
+=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
+=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
__device__
void
calculateMutualInducedFieldPairIxnNoAdd_kernel
(
const
float
inducedDipole
[
3
],
const
float4
delta
,
const
float
preFactor2
,
float
fieldSum
[
3
]
)
{
float
preFactor3
=
preFactor2
*
(
inducedDipole
[
0
]
*
delta
.
x
+
inducedDipole
[
1
]
*
delta
.
y
+
inducedDipole
[
2
]
*
delta
.
z
);
fieldSum
[
0
]
=
preFactor3
*
delta
.
x
+
delta
.
w
*
inducedDipole
[
0
];
fieldSum
[
1
]
=
preFactor3
*
delta
.
y
+
delta
.
w
*
inducedDipole
[
1
];
fieldSum
[
2
]
=
preFactor3
*
delta
.
z
+
delta
.
w
*
inducedDipole
[
2
];
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
uscale
,
float4
fields
[
3
]
){
// compute the real space portion of the Ewald summation
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic boundary conditions
xr
-=
floorf
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floorf
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floorf
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfcf
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
expf
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
expf
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
preFactor1
=
rr3
-
bn1
;
float
preFactor2
=
bn2
-
rr5
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
preFactor3
=
preFactor2
*
dukr
;
fields
[
0
].
x
=
preFactor3
*
xr
+
preFactor1
*
atomJ
.
inducedDipole
[
0
];
fields
[
1
].
x
=
preFactor3
*
yr
+
preFactor1
*
atomJ
.
inducedDipole
[
1
];
fields
[
2
].
x
=
preFactor3
*
zr
+
preFactor1
*
atomJ
.
inducedDipole
[
2
];
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
duir
;
fields
[
0
].
y
=
preFactor3
*
xr
+
preFactor1
*
atomI
.
inducedDipole
[
0
];
fields
[
1
].
y
=
preFactor3
*
yr
+
preFactor1
*
atomI
.
inducedDipole
[
1
];
fields
[
2
].
y
=
preFactor3
*
zr
+
preFactor1
*
atomI
.
inducedDipole
[
2
];
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
pukr
;
fields
[
0
].
z
=
preFactor3
*
xr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
0
];
fields
[
1
].
z
=
preFactor3
*
yr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
1
];
fields
[
2
].
z
=
preFactor3
*
zr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
2
];
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
puir
;
fields
[
0
].
w
=
preFactor3
*
xr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
0
];
fields
[
1
].
w
=
preFactor3
*
yr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
1
];
fields
[
2
].
w
=
preFactor3
*
zr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
2
];
}
else
{
fields
[
0
].
x
=
0.0
f
;
fields
[
0
].
y
=
0.0
f
;
fields
[
0
].
z
=
0.0
f
;
fields
[
0
].
w
=
0.0
f
;
fields
[
1
].
x
=
0.0
f
;
fields
[
1
].
y
=
0.0
f
;
fields
[
1
].
z
=
0.0
f
;
fields
[
1
].
w
=
0.0
f
;
fields
[
2
].
x
=
0.0
f
;
fields
[
2
].
y
=
0.0
f
;
fields
[
2
].
z
=
0.0
f
;
fields
[
2
].
w
=
0.0
f
;
}
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateAmoebaCudaPmeMutualInducedField.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kInitializeMutualInducedField_kernel
(
int
numberOfAtoms
,
float
*
fixedEField
,
float
*
fixedEFieldPolar
,
float
*
polarizability
)
{
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
fixedEField
[
pos
]
*=
polarizability
[
pos
];
fixedEFieldPolar
[
pos
]
*=
polarizability
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kReduceMutualInducedFieldDelta_kernel
(
int
numberOfEntries
,
float
*
arrayOfDeltas1
,
float
*
arrayOfDeltas2
,
float
*
epsilon
)
{
extern
__shared__
float2
delta
[];
delta
[
threadIdx
.
x
].
x
=
0.0
f
;
delta
[
threadIdx
.
x
].
y
=
0.0
f
;
unsigned
int
pos
=
threadIdx
.
x
;
// load deltas
while
(
pos
<
numberOfEntries
)
{
delta
[
threadIdx
.
x
].
x
+=
arrayOfDeltas1
[
pos
];
delta
[
threadIdx
.
x
].
y
+=
arrayOfDeltas2
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
__syncthreads
();
// sum the deltas
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
delta
[
threadIdx
.
x
].
x
+=
delta
[
threadIdx
.
x
+
offset
].
x
;
delta
[
threadIdx
.
x
].
y
+=
delta
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
// set epsilons
if
(
threadIdx
.
x
==
0
)
{
epsilon
[
0
]
=
delta
[
0
].
x
>
delta
[
0
].
y
?
delta
[
0
].
x
:
delta
[
0
].
y
;
epsilon
[
0
]
=
48.033324
f
*
sqrtf
(
epsilon
[
0
]
/
(
(
float
)
(
numberOfEntries
/
3
))
);
}
}
/**
matrixProduct/matrixProductP contains epsilon**2 on output
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
static
void
kSorUpdateMutualInducedField_kernel
(
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipoleP
,
float
*
fixedEField
,
float
*
fixedEFieldP
,
float
*
matrixProduct
,
float
*
matrixProductP
)
{
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
const
float
polarSOR
=
0.55
f
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
float
previousDipole
=
inducedDipole
[
pos
];
float
previousDipoleP
=
inducedDipoleP
[
pos
];
// add self terms to fields
float
mProd
=
matrixProduct
[
pos
];
float
mProdP
=
matrixProductP
[
pos
];
mProd
+=
term
*
previousDipole
;
mProdP
+=
term
*
previousDipoleP
;
float
inducedDipoleI
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
mProd
;
float
inducedDipoleIP
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
mProdP
;
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipoleI
-
previousDipole
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleIP
-
previousDipoleP
);
matrixProduct
[
pos
]
=
(
inducedDipole
[
pos
]
-
previousDipole
)
*
(
inducedDipole
[
pos
]
-
previousDipole
);
matrixProductP
[
pos
]
=
(
inducedDipoleP
[
pos
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
// reduce psWorkArray_3_1
// reduce psWorkArray_3_2
static
void
kReduceMutualInducedFields
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
outputArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReducePmeMI_Fields1"
);
kReduceFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
,
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
outputPolarArray
->
_pDevData
,
0
);
LAUNCHERROR
(
"kReducePmeMI_Fields2"
);
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
outputArray
,
CUDAStream
<
float
>*
outputPolarArray
)
{
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kClearFields_3
(
amoebaGpu
,
2
);
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
MutualInducedParticle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
}
else
{
kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
}
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
kReduceMutualInducedFields
(
amoebaGpu
,
outputArray
,
outputPolarArray
);
}
/**---------------------------------------------------------------------------------------
Compute mutual induce field
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
static
void
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
int
done
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psPolarizability
->
_pDevData
);
LAUNCHERROR
(
"AmoebaPmeMutualInducedFieldSetup"
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
cudaMemcpy
(
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
3
*
gpu
->
sim
.
paddedNumberOfAtoms
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
);
// if polarization type is direct, set flags signalling done and return
if
(
amoebaGpu
->
amoebaSim
.
polarizationType
)
{
amoebaGpu
->
mutualInducedDone
=
1
;
amoebaGpu
->
mutualInducedConverged
=
1
;
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
return
;
}
// ---------------------------------------------------------------------------------------
done
=
0
;
iteration
=
1
;
while
(
!
done
){
// apply SOR
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
(
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
// get total epsilon -- performing sums on gpu
kReduceMutualInducedFieldDelta_kernel
<<<
1
,
amoebaGpu
->
epsilonThreadsPerBlock
,
2
*
sizeof
(
float
)
*
amoebaGpu
->
epsilonThreadsPerBlock
>>>
(
3
*
gpu
->
natoms
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
,
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
amoebaGpu
->
mutualInducedCurrentEpsilon
=
currentEpsilon
;
if
(
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
||
amoebaGpu
->
mutualInducedCurrentEpsilon
<
amoebaGpu
->
mutualInducedTargetEpsilon
){
done
=
1
;
}
// throw exception if nan detected
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
throw
OpenMM
::
OpenMMException
(
"PME induced dipole calculation detected nans."
);
}
iteration
++
;
}
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
}
void
cudaComputeAmoebaPmeMutualInducedField
(
amoebaGpuContext
amoebaGpu
)
{
if
(
amoebaGpu
->
mutualInducedIterativeMethod
==
0
){
cudaComputeAmoebaPmeMutualInducedFieldBySOR
(
amoebaGpu
);
}
}
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment