Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ef1d4eb
Commit
5ef1d4eb
authored
Feb 08, 2017
by
Peter Eastman
Browse files
Created OpenCL implementation of dispersion PME
parent
edfcdec3
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
572 additions
and
133 deletions
+572
-133
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+13
-12
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+34
-34
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+17
-4
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+303
-71
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+45
-1
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+94
-11
tests/TestDispersionPME.h
tests/TestDispersionPME.h
+66
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
5ef1d4eb
...
...
@@ -1646,8 +1646,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent();
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
// Identify which exceptions are 1-4 interactions.
vector<pair<int, int> > exclusions;
...
...
@@ -1664,7 +1662,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
doLJPME = (nonbondedMethod == LJPME);
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
...
...
@@ -1683,8 +1680,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
posqd[i] = make_double4(0, 0, 0, charge);
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
double sig =
(float) (
0.5*sigma
)
;
double eps =
(float) (
2.0*sqrt(epsilon)
)
;
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
...
...
@@ -1701,8 +1698,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
...
@@ -1761,7 +1760,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
// Compute the PME parameters.
//
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
...
...
@@ -1907,7 +1906,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
else {
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
if (doLJPME)
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
}
// Prepare for doing PME on its own stream.
...
...
@@ -1928,6 +1928,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
}
hasInitializedFFT = true;
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
...
...
@@ -2025,11 +2026,11 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
// Initialize the exceptions.
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
5ef1d4eb
...
...
@@ -18,24 +18,24 @@
#endif
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
...
...
@@ -76,22 +76,22 @@
ljEnergy
*=
switchValue
;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
5ef1d4eb
...
...
@@ -576,8 +576,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -649,6 +650,9 @@ private:
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
...
...
@@ -657,26 +661,35 @@ private:
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
dispersionFft
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeDispersionAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeDispersionZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeDispersionUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeDispersionConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeDispersionEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeDispersionInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
5ef1d4eb
This diff is collapsed.
Click to expand it.
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
5ef1d4eb
...
...
@@ -22,6 +22,26 @@
const
real
erfcAlphaR
=
(
0.254829592f+
(
-0.284496736f+
(
1.421413741f+
(
-1.453152027f+1.061405429f*t
)
*t
)
*t
)
*t
)
*t*
expAlphaRSqr
;
#
endif
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
The
real
terms
have
an
additive
contribution
//
added
in,
but
for
excluded
terms
the
multiplicative
term
is
just
subtracted.
//
These
factors
are
needed
in
both
clauses
of
the
needCorrection
statement,
so
//
I
declare
them
up
here.
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
sigmaEpsilon1*sigmaEpsilon2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
#
endif
#
endif
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
...
@@ -34,6 +54,13 @@
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
#
if
HAS_LENNARD_JONES
#
if
DO_LJPME
//
The
multiplicative
grid
term
tempEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
#
endif
}
else
{
#
if
HAS_LENNARD_JONES
...
...
@@ -41,7 +68,8 @@
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
...
...
@@ -53,6 +81,22 @@
ljEnergy
*=
switchValue
;
}
#
endif
#
if
DO_LJPME
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
#
else
...
...
platforms/opencl/src/kernels/pme.cl
View file @
5ef1d4eb
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
...
@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j].w
=
pos.w
; // Storing the charge here improves cache coherency in the charge spreading kernel
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
data[j].w
=
charge
; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
}
#
endif
...
...
@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
...
...
@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
...
...
@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w
*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
charge
*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
...
...
@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
...
...
@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
...
...
@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*
pos.w
*data[ix].x*data[iy].y*data[iz].z;
pmeGrid[index] += EPSILON_FACTOR*
charge
*data[ix].x*data[iy].y*data[iz].z;
}
}
}
...
...
@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
...
...
@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
...
...
@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
#endif
}
}
...
...
@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...
...
@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
...
...
@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
{
#ifndef USE_LJPME
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
#
endif
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
#
ifdef
USE_PME_STREAM
#
if
def
ined
(
USE_PME_STREAM
)
&&
!defined
(
USE_LJPME
)
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
...
...
@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
...
@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
real4
totalForce
=
forceBuffers[atom]
;
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
real
q
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
real
q
=
pos.w*EPSILON_FACTOR
;
#
endif
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecX.x
)
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y
)
;
totalForce.z
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z
)
;
...
...
tests/TestDispersionPME.h
View file @
5ef1d4eb
...
...
@@ -222,6 +222,71 @@ void testPMEParameters() {
force
->
getLJPMEParametersInContext
(
context2
,
alpha
,
gridx
,
gridy
,
gridz
);
}
void
testCoulombAndLJ
()
{
// Create a cloud of random particles.
const
int
numParticles
=
200
;
const
double
boxWidth
=
5.0
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxWidth
,
0
,
0
),
Vec3
(
0
,
boxWidth
,
0
),
Vec3
(
0
,
0
,
boxWidth
));
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
charge
(
numParticles
),
sigma
(
numParticles
),
epsilon
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
charge
[
i
]
=
-
1.0
+
i
*
2.0
/
(
numParticles
-
1
);
sigma
[
i
]
=
0.1
+
0.2
*
genrand_real2
(
sfmt
);
epsilon
[
i
]
=
genrand_real2
(
sfmt
);
force
->
addParticle
(
charge
[
i
],
1.0
,
0.0
);
while
(
true
)
{
Vec3
pos
=
Vec3
(
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
));
double
minDist
=
boxWidth
;
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
pos
-
positions
[
j
];
minDist
=
min
(
minDist
,
sqrt
(
delta
.
dot
(
delta
)));
}
if
(
minDist
>
0.1
)
{
positions
[
i
]
=
pos
;
break
;
}
}
}
force
->
setNonbondedMethod
(
NonbondedForce
::
LJPME
);
// Compute forces and energy with only Coulomb interactions.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Now repeat with only LJ interactions.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
0.0
,
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Finally compute with both Coulomb and LJ.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
charge
[
i
],
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Make sure the results agree.
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
()
+
state2
.
getPotentialEnergy
(),
state3
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
]
+
state2
.
getForces
()[
i
],
state3
.
getForces
()[
i
],
1e-5
);
}
void
make_waterbox
(
int
natoms
,
double
boxEdgeLength
,
NonbondedForce
*
forceField
,
vector
<
Vec3
>
&
positions
,
vector
<
double
>&
eps
,
vector
<
double
>&
sig
,
vector
<
pair
<
int
,
int
>
>&
bonds
,
System
&
system
,
bool
do_electrostatics
)
{
const
int
RESSIZE
=
3
;
...
...
@@ -1862,6 +1927,7 @@ int main(int argc, char* argv[]) {
testConvergence
();
testErrorTolerance
();
testPMEParameters
();
testCoulombAndLJ
();
testWater2DpmeEnergiesForcesNoExclusions
();
testWater2DpmeEnergiesForcesWithExclusions
();
testWater125DpmeVsLongCutoffNoExclusions
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment