Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7c6f0b4b
Commit
7c6f0b4b
authored
Jun 26, 2018
by
Peter Eastman
Browse files
CUDA implementation of parameter offsets for NonbondedForce
parent
7ebaa816
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
251 additions
and
69 deletions
+251
-69
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+12
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+160
-65
platforms/cuda/src/kernels/nonbondedParameters.cu
platforms/cuda/src/kernels/nonbondedParameters.cu
+63
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+15
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
7c6f0b4b
...
...
@@ -609,7 +609,7 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
,
usePmeStream
(
false
)
{
}
~
CudaCalcNonbondedForceKernel
();
/**
...
...
@@ -678,6 +678,13 @@ private:
CudaArray
charges
;
CudaArray
sigmaEpsilon
;
CudaArray
exceptionParams
;
CudaArray
baseParticleParams
;
CudaArray
baseExceptionParams
;
CudaArray
particleParamOffsets
;
CudaArray
exceptionParamOffsets
;
CudaArray
particleOffsetIndices
;
CudaArray
exceptionOffsetIndices
;
CudaArray
globalParams
;
CudaArray
cosSinSums
;
CudaArray
directPmeGrid
;
CudaArray
reciprocalPmeGrid
;
...
...
@@ -701,6 +708,7 @@ private:
CudaFFT3D
*
dispersionFft
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftBackward
;
CUfunction
computeParamsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
...
...
@@ -716,11 +724,13 @@ private:
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
string
>
paramNames
;
std
::
vector
<
double
>
paramValues
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
,
recomputeParams
,
hasOffsets
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
7c6f0b4b
...
...
@@ -1579,24 +1579,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
vector<double> chargeVec(cu.getPaddedNumAtoms(), 0.0);
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVec[i] = charge;
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
...
...
@@ -1639,6 +1630,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
map<string, string> paramsDefines;
if (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_OFFSETS"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
...
...
@@ -1648,7 +1642,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
// Create the reciprocal space kernels.
...
...
@@ -1690,10 +1687,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cu.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
...
...
@@ -1934,29 +1937,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize<float4>(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map<string, string> replacements;
if (usePosqCharges) {
cu.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVec);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.upload(sigmaEpsilonVector);
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
...
...
@@ -1971,33 +1967,151 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
baseExceptionParams.initialize<float4>(cu, numExceptions, "baseExceptionParams");
vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions());
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
particleOffsetVec[particle].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exception].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, force.getNumParticles()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
if (force.getNumExceptionParameterOffsets() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
globalParams.initialize(cu, max((int) paramValues.size(), 1), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "globalParams");
recomputeParams = true;
// Initialize the kernel for updating parameters.
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
if (value != paramValues[i]) {
paramValues[i] = value;;
paramChanged = true;
}
}
if (paramChanged) {
recomputeParams = true;
if (cu.getUseDoublePrecision())
globalParams.upload(paramValues);
else {
vector<float> v(paramValues.size());
for (int i = 0; i < v.size(); i++)
v[i] = paramValues[i];
globalParams.upload(v);
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, pmeSyncEvent, 0);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (directPmeGrid.isInitialized()&& includeReciprocal) {
if (directPmeGrid.isInitialized()
&& includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
...
...
@@ -2153,7 +2267,6 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
...
...
@@ -2194,60 +2307,42 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the per-particle parameters.
vector<double> chargeVector(cu.getPaddedNumAtoms(), 0.0);
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms());
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
const vector<int>& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVector[i] = charge;
double sig = (0.5*sigma);
double eps = (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2((float) sig, (float) eps);
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
}
for (int i = force.getNumParticles(); i < cu.getPaddedNumAtoms(); i++)
sigmaEpsilonVector[i] = make_float2(0,0);
if (usePosqCharges)
cu.setCharges(chargeVector);
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVector);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVector[i];
charges.upload(c);
}
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
}
sigmaEpsilon.upload(sigmaEpsilonVector
);
baseParticleParams.upload(baseParticleParamVec
);
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4>
e
xceptionParamsVec
tor
(numExceptions);
vector<float4>
baseE
xceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
}
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == LJPME)
ewaldSelfEnergy += (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)*sumSquaredC6/12.0 : 0);
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
recomputeParams = true;
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
...
...
platforms/cuda/src/kernels/nonbondedParameters.cu
0 → 100644
View file @
7c6f0b4b
/**
* Compute the nonbonded parameters for particles and exceptions.
*/
extern
"C"
__global__
void
computeParameters
(
mixed
*
__restrict__
energyBuffer
,
bool
includeSelfEnergy
,
real
*
__restrict__
globalParams
,
int
numAtoms
,
const
float4
*
__restrict__
baseParticleParams
,
real4
*
__restrict__
posq
,
real
*
__restrict__
charge
,
float2
*
__restrict__
sigmaEpsilon
,
float4
*
__restrict__
particleParamOffsets
,
int
*
__restrict__
particleOffsetIndices
#ifdef HAS_EXCEPTIONS
,
int
numExceptions
,
const
float4
*
__restrict__
baseExceptionParams
,
float4
*
__restrict__
exceptionParams
,
float4
*
__restrict__
exceptionParamOffsets
,
int
*
__restrict__
exceptionOffsetIndices
#endif
)
{
mixed
energy
=
0
;
// Compute particle parameters.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseParticleParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
particleOffsetIndices
[
i
],
end
=
particleOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
particleParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
#ifdef USE_POSQ_CHARGES
posq
[
i
].
w
=
params
.
x
;
#else
charge
[
i
]
=
params
.
x
;
#endif
sigmaEpsilon
[
i
]
=
make_float2
(
0.5
f
*
params
.
y
,
2
*
SQRT
(
params
.
z
));
#ifdef INCLUDE_EWALD
energy
-=
EWALD_SELF_ENERGY_SCALE
*
params
.
x
*
params
.
x
;
#endif
#ifdef INCLUDE_LJPME
real
sig3
=
params
.
y
*
params
.
y
*
params
.
y
;
energy
+=
LJPME_SELF_ENERGY_SCALE
*
sig3
*
sig3
*
params
.
z
;
#endif
}
// Compute exception parameters.
#ifdef HAS_EXCEPTIONS
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numExceptions
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseExceptionParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
exceptionOffsetIndices
[
i
],
end
=
exceptionOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
exceptionParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
exceptionParams
[
i
]
=
make_float4
((
float
)
(
138.935456
f
*
params
.
x
),
(
float
)
params
.
y
,
(
float
)
(
4
*
params
.
z
),
0
);
}
#endif
if
(
includeSelfEnergy
)
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
platforms/opencl/include/OpenCLKernels.h
View file @
7c6f0b4b
...
...
@@ -586,7 +586,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
,
usePmeQueue
(
false
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
7c6f0b4b
...
...
@@ -1678,7 +1678,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cl.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy
+
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
ewaldSelfEnergy
-
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cl.doubleToString(pow(dispersionAlpha, 6)/3.0);
...
...
@@ -2164,7 +2164,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (usePmeQueue) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]);
pmeQueue.enqueueWaitForEvents(events);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
...
...
@@ -2465,6 +2471,14 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cl.invalidateMolecules(info);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment