Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ef1d4eb
"docs-source/usersguide/vscode:/vscode.git/clone" did not exist on "cc8cdf60b00af9ed6a8a350da683e94bcc3fb481"
Commit
5ef1d4eb
authored
Feb 08, 2017
by
Peter Eastman
Browse files
Created OpenCL implementation of dispersion PME
parent
edfcdec3
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
572 additions
and
133 deletions
+572
-133
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+13
-12
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+34
-34
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+17
-4
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+303
-71
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+45
-1
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+94
-11
tests/TestDispersionPME.h
tests/TestDispersionPME.h
+66
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
5ef1d4eb
...
...
@@ -1646,8 +1646,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent();
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
// Identify which exceptions are 1-4 interactions.
vector<pair<int, int> > exclusions;
...
...
@@ -1664,7 +1662,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
doLJPME = (nonbondedMethod == LJPME);
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
...
...
@@ -1683,8 +1680,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
posqd[i] = make_double4(0, 0, 0, charge);
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
double sig =
(float) (
0.5*sigma
)
;
double eps =
(float) (
2.0*sqrt(epsilon)
)
;
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
...
...
@@ -1701,8 +1698,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
...
@@ -1761,7 +1760,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
// Compute the PME parameters.
//
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
...
...
@@ -1907,7 +1906,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
else {
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
if (doLJPME)
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
}
// Prepare for doing PME on its own stream.
...
...
@@ -1928,6 +1928,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
}
hasInitializedFFT = true;
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
...
...
@@ -2025,11 +2026,11 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
// Initialize the exceptions.
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
5ef1d4eb
...
...
@@ -18,24 +18,24 @@
#endif
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
...
...
@@ -76,22 +76,22 @@
ljEnergy
*=
switchValue
;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
5ef1d4eb
...
...
@@ -576,8 +576,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -649,6 +650,9 @@ private:
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
...
...
@@ -657,26 +661,35 @@ private:
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
dispersionFft
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeDispersionAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeDispersionZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeDispersionUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeDispersionConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeDispersionEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeDispersionInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
5ef1d4eb
...
...
@@ -1608,6 +1608,12 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeAtomRange != NULL)
...
...
@@ -1620,6 +1626,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete sort;
if (fft != NULL)
delete fft;
if (dispersionFft != NULL)
delete dispersionFft;
if (pmeio != NULL)
delete pmeio;
}
...
...
@@ -1648,6 +1656,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
...
...
@@ -1657,9 +1666,13 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
posqd[i] = mm_double4(0, 0, 0, charge);
else
posqf[i] = mm_float4(0, 0, 0, (float) charge);
sigmaEpsilonVector[i] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = mm_float2((float) sig, (float) eps);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
...
...
@@ -1677,6 +1690,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
...
@@ -1698,7 +1712,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["LJ_SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0)
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0
&& !doLJPME
)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
...
...
@@ -1730,18 +1744,30 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME) {
else if (nonbondedMethod == PME
|| nonbondedMethod == LJPME
) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = OpenCLFFT3D::findLegalDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = OpenCLFFT3D::findLegalDimension(dispersionGridSizeX);
dispersionGridSizeY = OpenCLFFT3D::findLegalDimension(dispersionGridSizeY);
dispersionGridSizeZ = OpenCLFFT3D::findLegalDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
if (cl.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
...
...
@@ -1764,6 +1790,13 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeio = new PmeIO(cl, addForcesKernel);
cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio));
cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
if (doLJPME) {
cpuDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, numParticles, dispersionAlpha);
cl.addPreComputation(new PmePreComputation(cl, cpuDispersionPme, *pmeio));
cl.addPostComputation(new PmePostComputation(cpuDispersionPme, *pmeio));
}
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
...
...
@@ -1772,9 +1805,21 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (pmeio == NULL) {
// Create required data structures.
if (doLJPME) {
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cl.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cl.doubleToString(multShift6);
}
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeGrid = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2 = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid2");
if (cl.getSupports64BitGlobalAtomics())
cl.addAutoclearBuffer(*pmeGrid2);
else
...
...
@@ -1782,6 +1827,11 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX = new OpenCLArray(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new OpenCLArray(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new OpenCLArray(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
...
...
@@ -1790,6 +1840,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl.clearBuffer(*pmeEnergyBuffer);
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = new OpenCLFFT3D(cl, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
string vendor = cl.getDevice().getInfo<CL_DEVICE_VENDOR>();
bool isNvidia = (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA");
if (isNvidia)
...
...
@@ -1807,74 +1859,96 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
OpenCLArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
}
for (int i = 0; i < ndata; i++)
{
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
else {
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
}
if (cl.getUseDoublePrecision()) {
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
else {
vector<float> modulif(ndata);
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
}
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
{
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
}
if (cl.getUseDoublePrecision()) {
if (dim == 0)
xmoduli->upload(moduli);
else if (dim == 1)
ymoduli->upload(moduli);
else
zmoduli->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
xmoduli->upload(modulif);
else if (dim == 1)
ymoduli->upload(modulif);
else
zmoduli->upload(modulif);
}
}
}
}
...
...
@@ -1926,6 +2000,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums->getDeviceBuffer());
}
if (pmeGrid != NULL) {
// Create kernels for Coulomb PME.
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
...
...
@@ -1972,6 +2048,65 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
if (usePmeQueue)
syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
if (doLJPME) {
// Create kernels for LJ PME.
pmeDefines["EWALD_ALPHA"] = cl.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cl.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cl.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(dispersionGridSizeZ);
pmeDefines["EPSILON_FACTOR"] = "1";
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeDispersionUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeDispersionAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeDispersionZIndexKernel = cl::Kernel(program, "recordZIndex");
pmeDispersionSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeDispersionConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeDispersionEvalEnergyKernel = cl::Kernel(program, "gridEvaluateEnergy");
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2->getDeviceBuffer());
else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid->getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta->getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer->getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid->getDeviceBuffer());
}
}
}
}
if (cosSinSums != NULL && includeReciprocal) {
...
...
@@ -2042,7 +2177,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
else {
sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
...
...
@@ -2054,7 +2189,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel,
pmeGrid->get
Size
()
);
cl.executeKernel(pmeFinishSpreadChargeKernel,
gridSizeX*gridSizeY*grid
Size
Z
);
}
else {
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
...
...
@@ -2104,6 +2239,103 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
if (doLJPME) {
setPeriodicBoxArgs(cl, pmeDispersionUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeDispersionUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
pmeDispersionUpdateBsplinesKernel.setArg<mm_double4>(10, recipBoxVectors[1]);
pmeDispersionUpdateBsplinesKernel.setArg<mm_double4>(11, recipBoxVectors[2]);
}
else {
pmeDispersionUpdateBsplinesKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[0]);
pmeDispersionUpdateBsplinesKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[1]);
pmeDispersionUpdateBsplinesKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(*pmeGrid);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
}
else {
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(*pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
}
else {
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeDispersionSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
}
else {
cl.clearBuffer(*pmeGrid);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
if (cl.getUseDoublePrecision())
pmeDispersionZIndexKernel.setArg<mm_double4>(3, recipBoxVectors[2]);
else
pmeDispersionZIndexKernel.setArg<mm_float4>(3, recipBoxVectorsFloat[2]);
cl.executeKernel(pmeDispersionZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
}
}
dispersionFft->execFFT(*pmeGrid, *pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeDispersionConvolutionKernel.setArg<mm_double4>(5, recipBoxVectors[1]);
pmeDispersionConvolutionKernel.setArg<mm_double4>(6, recipBoxVectors[2]);
pmeDispersionEvalEnergyKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeDispersionEvalEnergyKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeDispersionEvalEnergyKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
}
else {
pmeDispersionConvolutionKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]);
pmeDispersionConvolutionKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]);
pmeDispersionConvolutionKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]);
pmeDispersionEvalEnergyKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeDispersionEvalEnergyKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeDispersionEvalEnergyKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
}
if (includeEnergy)
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(*pmeGrid2, *pmeGrid, false);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(9, recipBoxVectors[1]);
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(10, recipBoxVectors[2]);
}
else {
pmeDispersionInterpolateForceKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[0]);
pmeDispersionInterpolateForceKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[1]);
pmeDispersionInterpolateForceKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
cl.executeKernel(pmeDispersionInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
cl.executeKernel(pmeDispersionInterpolateForceKernel, cl.getNumAtoms());
}
if (usePmeQueue) {
pmeQueue.enqueueMarker(&pmeSyncEvent);
cl.restoreDefaultQueue();
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
5ef1d4eb
...
...
@@ -22,6 +22,26 @@
const
real
erfcAlphaR
=
(
0.254829592f+
(
-0.284496736f+
(
1.421413741f+
(
-1.453152027f+1.061405429f*t
)
*t
)
*t
)
*t
)
*t*
expAlphaRSqr
;
#
endif
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
The
real
terms
have
an
additive
contribution
//
added
in,
but
for
excluded
terms
the
multiplicative
term
is
just
subtracted.
//
These
factors
are
needed
in
both
clauses
of
the
needCorrection
statement,
so
//
I
declare
them
up
here.
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
sigmaEpsilon1*sigmaEpsilon2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
#
endif
#
endif
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
...
@@ -34,6 +54,13 @@
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
#
if
HAS_LENNARD_JONES
#
if
DO_LJPME
//
The
multiplicative
grid
term
tempEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
#
endif
}
else
{
#
if
HAS_LENNARD_JONES
...
...
@@ -41,7 +68,8 @@
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
...
...
@@ -53,6 +81,22 @@
ljEnergy
*=
switchValue
;
}
#
endif
#
if
DO_LJPME
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
#
else
...
...
platforms/opencl/src/kernels/pme.cl
View file @
5ef1d4eb
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
...
@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j].w
=
pos.w
; // Storing the charge here improves cache coherency in the charge spreading kernel
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
data[j].w
=
charge
; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
}
#
endif
...
...
@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
...
...
@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
...
...
@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w
*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
charge
*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
...
...
@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
...
...
@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
...
...
@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*
pos.w
*data[ix].x*data[iy].y*data[iz].z;
pmeGrid[index] += EPSILON_FACTOR*
charge
*data[ix].x*data[iy].y*data[iz].z;
}
}
}
...
...
@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
...
...
@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
...
...
@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
#endif
}
}
...
...
@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...
...
@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
...
...
@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
{
#ifndef USE_LJPME
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
#
endif
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
#
ifdef
USE_PME_STREAM
#
if
def
ined
(
USE_PME_STREAM
)
&&
!defined
(
USE_LJPME
)
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
...
...
@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
...
@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
real4
totalForce
=
forceBuffers[atom]
;
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
real
q
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
real
q
=
pos.w*EPSILON_FACTOR
;
#
endif
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecX.x
)
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y
)
;
totalForce.z
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z
)
;
...
...
tests/TestDispersionPME.h
View file @
5ef1d4eb
...
...
@@ -222,6 +222,71 @@ void testPMEParameters() {
force
->
getLJPMEParametersInContext
(
context2
,
alpha
,
gridx
,
gridy
,
gridz
);
}
void
testCoulombAndLJ
()
{
// Create a cloud of random particles.
const
int
numParticles
=
200
;
const
double
boxWidth
=
5.0
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxWidth
,
0
,
0
),
Vec3
(
0
,
boxWidth
,
0
),
Vec3
(
0
,
0
,
boxWidth
));
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
charge
(
numParticles
),
sigma
(
numParticles
),
epsilon
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
charge
[
i
]
=
-
1.0
+
i
*
2.0
/
(
numParticles
-
1
);
sigma
[
i
]
=
0.1
+
0.2
*
genrand_real2
(
sfmt
);
epsilon
[
i
]
=
genrand_real2
(
sfmt
);
force
->
addParticle
(
charge
[
i
],
1.0
,
0.0
);
while
(
true
)
{
Vec3
pos
=
Vec3
(
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
),
boxWidth
*
genrand_real2
(
sfmt
));
double
minDist
=
boxWidth
;
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
pos
-
positions
[
j
];
minDist
=
min
(
minDist
,
sqrt
(
delta
.
dot
(
delta
)));
}
if
(
minDist
>
0.1
)
{
positions
[
i
]
=
pos
;
break
;
}
}
}
force
->
setNonbondedMethod
(
NonbondedForce
::
LJPME
);
// Compute forces and energy with only Coulomb interactions.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Now repeat with only LJ interactions.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
0.0
,
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Finally compute with both Coulomb and LJ.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
setParticleParameters
(
i
,
charge
[
i
],
sigma
[
i
],
epsilon
[
i
]);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Make sure the results agree.
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
()
+
state2
.
getPotentialEnergy
(),
state3
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
]
+
state2
.
getForces
()[
i
],
state3
.
getForces
()[
i
],
1e-5
);
}
void
make_waterbox
(
int
natoms
,
double
boxEdgeLength
,
NonbondedForce
*
forceField
,
vector
<
Vec3
>
&
positions
,
vector
<
double
>&
eps
,
vector
<
double
>&
sig
,
vector
<
pair
<
int
,
int
>
>&
bonds
,
System
&
system
,
bool
do_electrostatics
)
{
const
int
RESSIZE
=
3
;
...
...
@@ -1862,6 +1927,7 @@ int main(int argc, char* argv[]) {
testConvergence
();
testErrorTolerance
();
testPMEParameters
();
testCoulombAndLJ
();
testWater2DpmeEnergiesForcesNoExclusions
();
testWater2DpmeEnergiesForcesWithExclusions
();
testWater125DpmeVsLongCutoffNoExclusions
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment