Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ef1d4eb
Commit
5ef1d4eb
authored
Feb 08, 2017
by
Peter Eastman
Browse files
Created OpenCL implementation of dispersion PME
parent
edfcdec3
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
572 additions
and
133 deletions
+572
-133
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+13
-12
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+34
-34
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+17
-4
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+303
-71
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+45
-1
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+94
-11
tests/TestDispersionPME.h
tests/TestDispersionPME.h
+66
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
5ef1d4eb
...
@@ -1646,8 +1646,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1646,8 +1646,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
// Identify which exceptions are 1-4 interactions.
// Identify which exceptions are 1-4 interactions.
vector<pair<int, int> > exclusions;
vector<pair<int, int> > exclusions;
...
@@ -1664,7 +1662,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1664,7 +1662,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
doLJPME = (nonbondedMethod == LJPME);
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
vector<double4> temp(posq.getSize());
...
@@ -1683,8 +1680,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1683,8 +1680,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
posqd[i] = make_double4(0, 0, 0, charge);
posqd[i] = make_double4(0, 0, 0, charge);
else
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
posqf[i] = make_float4(0, 0, 0, (float) charge);
double sig =
(float) (
0.5*sigma
)
;
double sig = 0.5*sigma;
double eps =
(float) (
2.0*sqrt(epsilon)
)
;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
sigmaEpsilonVector[i] = make_float2(sig, eps);
exclusionList[i].push_back(i);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
sumSquaredCharges += charge*charge;
...
@@ -1701,8 +1698,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1701,8 +1698,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
posq.upload(&temp[0]);
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map<string, string> defines;
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
@@ -1761,7 +1760,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1761,7 +1760,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
// Compute the PME parameters.
// Compute the PME parameters.
//
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
...
@@ -1907,6 +1906,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1907,6 +1906,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
else {
else {
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
}
}
...
@@ -1928,6 +1928,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1928,6 +1928,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
}
}
hasInitializedFFT = true;
hasInitializedFFT = true;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
for (int grid = 0; grid < 2; grid++) {
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
5ef1d4eb
...
@@ -76,7 +76,7 @@
...
@@ -76,7 +76,7 @@
ljEnergy
*=
switchValue
;
ljEnergy
*=
switchValue
;
}
}
#endif
#endif
#if DO_LJPME
#if DO_LJPME
// The multiplicative grid term
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
...
@@ -91,7 +91,7 @@
...
@@ -91,7 +91,7 @@
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
#else
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
5ef1d4eb
...
@@ -576,8 +576,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
...
@@ -576,8 +576,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
OpenCLCalcNonbondedForceKernel
();
~
OpenCLCalcNonbondedForceKernel
();
/**
/**
...
@@ -649,6 +650,9 @@ private:
...
@@ -649,6 +650,9 @@ private:
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
*
pmeAtomGridIndex
;
...
@@ -657,26 +661,35 @@ private:
...
@@ -657,26 +661,35 @@ private:
cl
::
CommandQueue
pmeQueue
;
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
dispersionFft
;
Kernel
cpuPme
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeDispersionAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeDispersionZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeDispersionUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeDispersionConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeDispersionEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeDispersionInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
5ef1d4eb
This diff is collapsed.
Click to expand it.
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
5ef1d4eb
...
@@ -22,6 +22,26 @@
...
@@ -22,6 +22,26 @@
const
real
erfcAlphaR
=
(
0.254829592f+
(
-0.284496736f+
(
1.421413741f+
(
-1.453152027f+1.061405429f*t
)
*t
)
*t
)
*t
)
*t*
expAlphaRSqr
;
const
real
erfcAlphaR
=
(
0.254829592f+
(
-0.284496736f+
(
1.421413741f+
(
-1.453152027f+1.061405429f*t
)
*t
)
*t
)
*t
)
*t*
expAlphaRSqr
;
#
endif
#
endif
real
tempForce
=
0
;
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
The
real
terms
have
an
additive
contribution
//
added
in,
but
for
excluded
terms
the
multiplicative
term
is
just
subtracted.
//
These
factors
are
needed
in
both
clauses
of
the
needCorrection
statement,
so
//
I
declare
them
up
here.
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
sigmaEpsilon1*sigmaEpsilon2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
#
endif
#
endif
if
(
needCorrection
)
{
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
@@ -34,6 +54,13 @@
...
@@ -34,6 +54,13 @@
includeInteraction
=
false
;
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
}
#
if
HAS_LENNARD_JONES
#
if
DO_LJPME
//
The
multiplicative
grid
term
tempEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
#
endif
}
}
else
{
else
{
#
if
HAS_LENNARD_JONES
#
if
HAS_LENNARD_JONES
...
@@ -41,7 +68,8 @@
...
@@ -41,7 +68,8 @@
real
sig2
=
invR*sig
;
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
#
if
USE_LJ_SWITCH
...
@@ -53,6 +81,22 @@
...
@@ -53,6 +81,22 @@
ljEnergy
*=
switchValue
;
ljEnergy
*=
switchValue
;
}
}
#
endif
#
endif
#
if
DO_LJPME
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
#
else
#
else
...
...
platforms/opencl/src/kernels/pme.cl
View file @
5ef1d4eb
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
...
@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j].w
=
pos.w
; // Storing the charge here improves cache coherency in the charge spreading kernel
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
data[j].w
=
charge
; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
}
}
#
endif
#
endif
...
@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
...
@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
...
@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
...
@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int
zindex
=
gridIndex.z+iz
;
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w
*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
charge
*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
...
@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
...
@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#
elif
defined
(
DEVICE_IS_CPU
)
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
if
(
firstx
==
lastx
)
...
@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
bool
hasComputedThetas
=
false
;
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
int
xindex
=
gridIndex.x+ix
;
...
@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*
pos.w
*data[ix].x*data[iy].y*data[iz].z;
pmeGrid[index] += EPSILON_FACTOR*
charge
*data[ix].x*data[iy].y*data[iz].z;
}
}
}
}
}
}
...
@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
}
#else
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
// Compute the charge on a grid point.
...
@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
...
@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
// real indices
...
@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
...
@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz];
real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index];
real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
}
#endif
}
}
}
}
...
@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0;
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...
@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx];
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
...
@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
}
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
{
#ifndef USE_LJPME
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
#
endif
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
}
#
if
defined
(
USE_PME_STREAM
)
&&
!defined
(
USE_LJPME
)
#
ifdef
USE_PME_STREAM
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
...
@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
...
@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
}
}
real4
totalForce
=
forceBuffers[atom]
;
real4
totalForce
=
forceBuffers[atom]
;
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
real
q
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
real
q
=
pos.w*EPSILON_FACTOR
;
real
q
=
pos.w*EPSILON_FACTOR
;
#
endif
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecX.x
)
;
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecX.x
)
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y
)
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y
)
;
totalForce.z
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z
)
;
totalForce.z
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z
)
;
...
...
tests/TestDispersionPME.h
View file @
5ef1d4eb
...
@@ -222,6 +222,71 @@ void testPMEParameters() {
...
@@ -222,6 +222,71 @@ void testPMEParameters() {
force
->
getLJPMEParametersInContext
(
context2
,
alpha
,
gridx
,
gridy
,
gridz
);
force
->
getLJPMEParametersInContext
(
context2
,
alpha
,
gridx
,
gridy
,
gridz
);
}
}
void
testCoulombAndLJ
()
{
// Create a cloud of random particles.
const
int
numParticles
=
200
;
const
double
boxWidth
=
5.0
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxWidth
,
0
,
0
),
Vec3
(
0
,
boxWidth
,
0
),
Vec3
(
0
,
0
,
boxWidth
));
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
charge
(
numParticles
),
sigma
(
numParticles
),
epsilon
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
charge
[
i
]
=
-
1.0
+
i
*
2.0
/
(
numParticles
-
1
);
sigma
[
i
]
=
0.1
+
0.2
*
genrand_real2
(
sfmt
);
epsilon
[
i
]
=
genrand_real2
(
sfmt
);
force
->
addParticle
(
charge
[
i
],
1.0
,
0.0
);
while
(
true
)
{
Vec3
pos
=
Vec3
(
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
));
double
minDist
=
boxWidth
;
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
pos
-
positions
[
j
];
minDist
=
min
(
minDist
,
sqrt
(
delta
.
dot
(
delta
)));
}
if
(
minDist
>
0.1
)
{
positions
[
i
]
=
pos
;
break
;
}
}
}
force
->
setNonbondedMethod
(
NonbondedForce
::
LJPME
);
// Compute forces and energy with only Coulomb interactions.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Now repeat with only LJ interactions.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
0.0
,
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Finally compute with both Coulomb and LJ.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
charge
[
i
],
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Make sure the results agree.
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
()
+
state2
.
getPotentialEnergy
(),
state3
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
]
+
state2
.
getForces
()[
i
],
state3
.
getForces
()[
i
],
1e-5
);
}
void
make_waterbox
(
int
natoms
,
double
boxEdgeLength
,
NonbondedForce
*
forceField
,
vector
<
Vec3
>
&
positions
,
vector
<
double
>&
eps
,
vector
<
double
>&
sig
,
void
make_waterbox
(
int
natoms
,
double
boxEdgeLength
,
NonbondedForce
*
forceField
,
vector
<
Vec3
>
&
positions
,
vector
<
double
>&
eps
,
vector
<
double
>&
sig
,
vector
<
pair
<
int
,
int
>
>&
bonds
,
System
&
system
,
bool
do_electrostatics
)
{
vector
<
pair
<
int
,
int
>
>&
bonds
,
System
&
system
,
bool
do_electrostatics
)
{
const
int
RESSIZE
=
3
;
const
int
RESSIZE
=
3
;
...
@@ -1862,6 +1927,7 @@ int main(int argc, char* argv[]) {
...
@@ -1862,6 +1927,7 @@ int main(int argc, char* argv[]) {
testConvergence
();
testConvergence
();
testErrorTolerance
();
testErrorTolerance
();
testPMEParameters
();
testPMEParameters
();
testCoulombAndLJ
();
testWater2DpmeEnergiesForcesNoExclusions
();
testWater2DpmeEnergiesForcesNoExclusions
();
testWater2DpmeEnergiesForcesWithExclusions
();
testWater2DpmeEnergiesForcesWithExclusions
();
testWater125DpmeVsLongCutoffNoExclusions
();
testWater125DpmeVsLongCutoffNoExclusions
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment