Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
da28ce0e
Commit
da28ce0e
authored
Jun 07, 2019
by
Peter Eastman
Browse files
Revised how exclusions are computed with PME
parent
df52ff73
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
319 additions
and
181 deletions
+319
-181
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+42
-3
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+50
-82
platforms/cuda/src/kernels/nonbondedParameters.cu
platforms/cuda/src/kernels/nonbondedParameters.cu
+26
-0
platforms/cuda/src/kernels/pmeExclusions.cu
platforms/cuda/src/kernels/pmeExclusions.cu
+34
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+55
-16
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+48
-80
platforms/opencl/src/kernels/nonbondedParameters.cl
platforms/opencl/src/kernels/nonbondedParameters.cl
+26
-0
platforms/opencl/src/kernels/pmeExclusions.cl
platforms/opencl/src/kernels/pmeExclusions.cl
+34
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
da28ce0e
...
...
@@ -678,6 +678,8 @@ private:
CudaArray
charges
;
CudaArray
sigmaEpsilon
;
CudaArray
exceptionParams
;
CudaArray
exclusionAtoms
;
CudaArray
exclusionParams
;
CudaArray
baseParticleParams
;
CudaArray
baseExceptionParams
;
CudaArray
particleParamOffsets
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
da28ce0e
...
...
@@ -1665,6 +1665,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
...
...
@@ -1929,7 +1931,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for
(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
...
...
@@ -1957,6 +1959,37 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<int2>(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize<float4>(cu, numExclusions, "exclusionParams");
vector<int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams.getDevicePointer(), "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
...
...
@@ -1967,7 +2000,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
...
...
@@ -2104,14 +2136,21 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions, numExclusions;
if (exceptionParams.isInitialized()) {
int
numExceptions = exceptionParams.getSize();
numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
if (exclusionParams.isInitialized()) {
numExclusions = exclusionParams.getSize();
paramsArgs.push_back(&numExclusions);
paramsArgs.push_back(&exclusionAtoms.getDevicePointer());
paramsArgs.push_back(&exclusionParams.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
da28ce0e
{
#if USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
unsigned
int
includeInteraction
=
((
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
);
unsigned
int
includeInteraction
=
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
);
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
#if HAS_COULOMB
...
...
@@ -22,89 +21,58 @@
#endif
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
SIGMA_EPSILON1
*
SIGMA_EPSILON2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if
(
1
-
erfcAlphaR
>
1e-6
)
{
real
erfAlphaR
=
ERF
(
alphaR
);
// Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce
=
-
prefactor
*
(
erfAlphaR
-
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
-
prefactor
*
erfAlphaR
;
}
else
{
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
CHARGE1
*
CHARGE2
;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
#endif
#endif
real
sig
=
SIGMA_EPSILON1
.
x
+
SIGMA_EPSILON2
.
x
;
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
eps
=
SIGMA_EPSILON1
.
y
*
SIGMA_EPSILON2
.
y
;
real
epssig6
=
sig6
*
eps
;
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
#if USE_LJ_SWITCH
if
(
r
>
LJ_SWITCH_CUTOFF
)
{
real
x
=
r
-
LJ_SWITCH_CUTOFF
;
real
switchValue
=
1
+
x
*
x
*
x
*
(
LJ_SWITCH_C3
+
x
*
(
LJ_SWITCH_C4
+
x
*
LJ_SWITCH_C5
));
real
switchDeriv
=
x
*
x
*
(
3
*
LJ_SWITCH_C3
+
x
*
(
4
*
LJ_SWITCH_C4
+
x
*
5
*
LJ_SWITCH_C5
));
tempForce
=
tempForce
*
switchValue
-
ljEnergy
*
switchDeriv
*
r
;
ljEnergy
*=
switchValue
;
}
else
{
#if HAS_LENNARD_JONES
real
sig
=
SIGMA_EPSILON1
.
x
+
SIGMA_EPSILON2
.
x
;
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
eps
=
SIGMA_EPSILON1
.
y
*
SIGMA_EPSILON2
.
y
;
real
epssig6
=
sig6
*
eps
;
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
#if USE_LJ_SWITCH
if
(
r
>
LJ_SWITCH_CUTOFF
)
{
real
x
=
r
-
LJ_SWITCH_CUTOFF
;
real
switchValue
=
1
+
x
*
x
*
x
*
(
LJ_SWITCH_C3
+
x
*
(
LJ_SWITCH_C4
+
x
*
LJ_SWITCH_C5
));
real
switchDeriv
=
x
*
x
*
(
3
*
LJ_SWITCH_C3
+
x
*
(
4
*
LJ_SWITCH_C4
+
x
*
5
*
LJ_SWITCH_C5
));
tempForce
=
tempForce
*
switchValue
-
ljEnergy
*
switchDeriv
*
r
;
ljEnergy
*=
switchValue
;
}
#endif
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
SIGMA_EPSILON1
*
SIGMA_EPSILON2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
prefactor
*
erfcAlphaR
:
0
;
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
prefactor
*
erfcAlphaR
:
0
;
#endif
}
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
#else
#ifdef USE_CUTOFF
...
...
@@ -113,7 +81,7 @@
unsigned
int
includeInteraction
=
(
!
isExcluded
);
#endif
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
real
sig
=
SIGMA_EPSILON1
.
x
+
SIGMA_EPSILON2
.
x
;
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
...
...
@@ -131,7 +99,7 @@
}
#endif
tempEnergy
+=
ljEnergy
;
#endif
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const
real
prefactor
=
138.935456
f
*
CHARGE1
*
CHARGE2
;
...
...
platforms/cuda/src/kernels/nonbondedParameters.cu
View file @
da28ce0e
...
...
@@ -7,6 +7,9 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
#ifdef HAS_EXCEPTIONS
,
int
numExceptions
,
const
float4
*
__restrict__
baseExceptionParams
,
float4
*
__restrict__
exceptionParams
,
float4
*
__restrict__
exceptionParamOffsets
,
int
*
__restrict__
exceptionOffsetIndices
#endif
#ifdef HAS_EXCLUSIONS
,
int
numExclusions
,
const
int2
*
__restrict__
exclusionAtoms
,
float4
*
__restrict__
exclusionParams
#endif
)
{
mixed
energy
=
0
;
...
...
@@ -60,6 +63,29 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
exceptionParams
[
i
]
=
make_float4
((
float
)
(
138.935456
f
*
params
.
x
),
(
float
)
params
.
y
,
(
float
)
(
4
*
params
.
z
),
0
);
}
#endif
// Compute parameters for subtracting the reciprocal part of excluded interactions.
#ifdef HAS_EXCLUSIONS
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numExclusions
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int2
atoms
=
exclusionAtoms
[
i
];
#ifdef USE_POSQ_CHARGES
real
chargeProd
=
posq
[
atoms
.
x
].
w
*
posq
[
atoms
.
y
].
w
;
#else
real
chargeProd
=
charge
[
atoms
.
x
]
*
charge
[
atoms
.
y
];
#endif
#ifdef INCLUDE_LJPME
float2
sigEps1
=
sigmaEpsilon
[
atoms
.
x
];
float2
sigEps2
=
sigmaEpsilon
[
atoms
.
y
];
float
sigma
=
sigEps1
.
x
*
sigEps2
.
x
;
float
epsilon
=
sigEps1
.
y
*
sigEps2
.
y
;
#else
float
sigma
=
0
;
float
epsilon
=
0
;
#endif
exclusionParams
[
i
]
=
make_float4
((
float
)
(
138.935456
f
*
chargeProd
),
sigma
,
epsilon
,
0
);
}
#endif
if
(
includeSelfEnergy
)
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
platforms/cuda/src/kernels/pmeExclusions.cu
0 → 100644
View file @
da28ce0e
const
float4
exclusionParams
=
PARAMS
[
index
];
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
const
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
const
real
invR
=
RSQRT
(
r2
);
const
real
r
=
r2
*
invR
;
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
const
real
erfAlphaR
=
ERF
(
alphaR
);
real
tempForce
=
0.0
f
;
if
(
erfAlphaR
>
1e-6
f
)
{
const
real
prefactor
=
exclusionParams
.
x
*
invR
;
tempForce
=
-
prefactor
*
(
erfAlphaR
-
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
energy
-=
prefactor
*
erfAlphaR
;
}
else
{
energy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
exclusionParams
.
x
;
}
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
c6
=
64
*
exclusionParams
.
y
*
exclusionParams
.
y
*
exclusionParams
.
y
*
exclusionParams
.
z
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
energy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
#endif
delta
*=
tempForce
*
invR
*
invR
;
real3
force1
=
-
delta
;
real3
force2
=
delta
;
platforms/opencl/include/OpenCLKernels.h
View file @
da28ce0e
...
...
@@ -655,6 +655,8 @@ private:
OpenCLArray
charges
;
OpenCLArray
sigmaEpsilon
;
OpenCLArray
exceptionParams
;
OpenCLArray
exclusionAtoms
;
OpenCLArray
exclusionParams
;
OpenCLArray
baseParticleParams
;
OpenCLArray
baseExceptionParams
;
OpenCLArray
particleParamOffsets
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
da28ce0e
...
...
@@ -1655,6 +1655,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
...
...
@@ -1853,7 +1855,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
for
(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
...
...
@@ -1883,6 +1885,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector<vector<int> > atoms(numExclusions, vector<int>(2));
exclusionAtoms.initialize<mm_int2>(cl, numExclusions, "exclusionAtoms");
exclusionParams.initialize<mm_float4>(cl, numExclusions, "exclusionParams");
vector<mm_int2> exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = mm_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exclusionParams.getDeviceBuffer(), "float4");
replacements["EWALD_ALPHA"] = cl.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
...
...
@@ -1893,7 +1926,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
...
...
@@ -2012,21 +2044,28 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
computeParamsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(2, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(3, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(4, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(5, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(6, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(7, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(8, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(9, particleOffsetIndices.getDeviceBuffer());
int index = 0;
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
index++;
computeParamsKernel.setArg<cl::Buffer>(index++, globalParams.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
computeParamsKernel.setArg<cl::Buffer>(index++, baseParticleParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, sigmaEpsilon.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, particleOffsetIndices.getDeviceBuffer());
if (exceptionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(10, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(11, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(12, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(13, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(14, exceptionOffsetIndices.getDeviceBuffer());
computeParamsKernel.setArg<cl_int>(index++, exceptionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(index++, baseExceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParams.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionParamOffsets.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer());
}
if (exclusionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(index++, exclusionParams.getSize());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionAtoms.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionParams.getDeviceBuffer());
}
if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
da28ce0e
...
...
@@ -5,8 +5,7 @@
unsigned
int
includeInteraction
;
#
endif
#
if
USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
includeInteraction
=
((
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
#
if
HAS_COULOMB
...
...
@@ -27,89 +26,58 @@
#
endif
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
The
real
terms
have
an
additive
contribution
//
added
in,
but
for
excluded
terms
the
multiplicative
term
is
just
subtracted.
//
These
factors
are
needed
in
both
clauses
of
the
needCorrection
statement,
so
//
I
declare
them
up
here.
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
SIGMA_EPSILON1*SIGMA_EPSILON2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
#
endif
#
endif
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
#
if
HAS_COULOMB
if
(
1-erfcAlphaR
>
1e-6
)
{
real
erfAlphaR
=
erf
(
alphaR
)
; // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
-prefactor*erfAlphaR
;
}
else
{
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2
;
}
#
endif
#
if
HAS_LENNARD_JONES
#
if
DO_LJPME
//
The
multiplicative
grid
term
tempEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
#
endif
real
sig
=
SIGMA_EPSILON1.x
+
SIGMA_EPSILON2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
eps
=
SIGMA_EPSILON1.y*SIGMA_EPSILON2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
if
(
r
>
LJ_SWITCH_CUTOFF
)
{
real
x
=
r-LJ_SWITCH_CUTOFF
;
real
switchValue
=
1+x*x*x*
(
LJ_SWITCH_C3+x*
(
LJ_SWITCH_C4+x*LJ_SWITCH_C5
))
;
real
switchDeriv
=
x*x*
(
3*LJ_SWITCH_C3+x*
(
4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5
))
;
tempForce
=
tempForce*switchValue
-
ljEnergy*switchDeriv*r
;
ljEnergy
*=
switchValue
;
}
else
{
#
if
HAS_LENNARD_JONES
real
sig
=
SIGMA_EPSILON1.x
+
SIGMA_EPSILON2.x
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
eps
=
SIGMA_EPSILON1.y*SIGMA_EPSILON2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
if
(
r
>
LJ_SWITCH_CUTOFF
)
{
real
x
=
r-LJ_SWITCH_CUTOFF
;
real
switchValue
=
1+x*x*x*
(
LJ_SWITCH_C3+x*
(
LJ_SWITCH_C4+x*LJ_SWITCH_C5
))
;
real
switchDeriv
=
x*x*
(
3*LJ_SWITCH_C3+x*
(
4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5
))
;
tempForce
=
tempForce*switchValue
-
ljEnergy*switchDeriv*r
;
ljEnergy
*=
switchValue
;
}
#
endif
#
endif
#
if
DO_LJPME
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
SIGMA_EPSILON1*SIGMA_EPSILON2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
#
else
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
prefactor*erfcAlphaR,
includeInteraction
)
;
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
prefactor*erfcAlphaR,
includeInteraction
)
;
#
endif
}
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
#
else
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/nonbondedParameters.cl
View file @
da28ce0e
...
...
@@ -7,6 +7,9 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
#
ifdef
HAS_EXCEPTIONS
,
int
numExceptions,
__global
const
float4*
restrict
baseExceptionParams,
__global
float4*
restrict
exceptionParams,
__global
float4*
restrict
exceptionParamOffsets,
__global
int*
restrict
exceptionOffsetIndices
#
endif
#
ifdef
HAS_EXCLUSIONS
,
int
numExclusions,
__global
const
int2*
restrict
exclusionAtoms,
__global
float4*
restrict
exclusionParams
#
endif
)
{
mixed
energy
=
0
;
...
...
@@ -60,6 +63,29 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
exceptionParams[i]
=
(
float4
)
((
float
)
(
138.935456f*params.x
)
,
(
float
)
params.y,
(
float
)
(
4*params.z
)
,
0
)
;
}
#
endif
//
Compute
parameters
for
subtracting
the
reciprocal
part
of
excluded
interactions.
#
ifdef
HAS_EXCLUSIONS
for
(
int
i
=
get_global_id
(
0
)
; i < numExclusions; i += get_global_size(0)) {
int2
atoms
=
exclusionAtoms[i]
;
#
ifdef
USE_POSQ_CHARGES
real
chargeProd
=
posq[atoms.x].w*posq[atoms.y].w
;
#
else
real
chargeProd
=
charge[atoms.x]*charge[atoms.y]
;
#
endif
#
ifdef
INCLUDE_LJPME
float2
sigEps1
=
sigmaEpsilon[atoms.x]
;
float2
sigEps2
=
sigmaEpsilon[atoms.y]
;
float
sigma
=
sigEps1.x*sigEps2.x
;
float
epsilon
=
sigEps1.y*sigEps2.y
;
#
else
float
sigma
=
0
;
float
epsilon
=
0
;
#
endif
exclusionParams[i]
=
(
float4
)
((
float
)
(
138.935456f*chargeProd
)
,
sigma,
epsilon,
0
)
;
}
#
endif
if
(
includeSelfEnergy
)
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
platforms/opencl/src/kernels/pmeExclusions.cl
0 → 100644
View file @
da28ce0e
const
float4
exclusionParams
=
PARAMS[index]
;
real3
delta
=
(
real3
)
(
pos2.x-pos1.x,
pos2.y-pos1.y,
pos2.z-pos1.z
)
;
const
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
const
real
invR
=
RSQRT
(
r2
)
;
const
real
r
=
r2*invR
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
erfAlphaR
=
erf
(
alphaR
)
;
real
tempForce
=
0.0f
;
if
(
erfAlphaR
>
1e-6f
)
{
const
real
prefactor
=
exclusionParams.x*invR
;
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
energy
-=
prefactor*erfAlphaR
;
}
else
{
energy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x
;
}
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
real
c6
=
64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
energy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
delta
*=
tempForce*invR*invR
;
real3
force1
=
-delta
;
real3
force2
=
delta
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment