Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7c6f0b4b
"wrappers/vscode:/vscode.git/clone" did not exist on "09611266c8a1fd0f90ec0926d7e65fddc3ebeaec"
Commit
7c6f0b4b
authored
Jun 26, 2018
by
Peter Eastman
Browse files
CUDA implementation of parameter offsets for NonbondedForce
parent
7ebaa816
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
251 additions
and
69 deletions
+251
-69
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+12
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+160
-65
platforms/cuda/src/kernels/nonbondedParameters.cu
platforms/cuda/src/kernels/nonbondedParameters.cu
+63
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+15
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
7c6f0b4b
...
...
@@ -609,7 +609,7 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
,
usePmeStream
(
false
)
{
}
~
CudaCalcNonbondedForceKernel
();
/**
...
...
@@ -678,6 +678,13 @@ private:
CudaArray
charges
;
CudaArray
sigmaEpsilon
;
CudaArray
exceptionParams
;
CudaArray
baseParticleParams
;
CudaArray
baseExceptionParams
;
CudaArray
particleParamOffsets
;
CudaArray
exceptionParamOffsets
;
CudaArray
particleOffsetIndices
;
CudaArray
exceptionOffsetIndices
;
CudaArray
globalParams
;
CudaArray
cosSinSums
;
CudaArray
directPmeGrid
;
CudaArray
reciprocalPmeGrid
;
...
...
@@ -701,6 +708,7 @@ private:
CudaFFT3D
*
dispersionFft
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftBackward
;
CUfunction
computeParamsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
...
...
@@ -716,11 +724,13 @@ private:
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
string
>
paramNames
;
std
::
vector
<
double
>
paramValues
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
,
recomputeParams
,
hasOffsets
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
7c6f0b4b
...
...
@@ -1579,24 +1579,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
vector<double> chargeVec(cu.getPaddedNumAtoms(), 0.0);
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVec[i] = charge;
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
...
...
@@ -1639,6 +1630,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
map<string, string> paramsDefines;
if (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_OFFSETS"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
...
...
@@ -1648,7 +1642,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
// Create the reciprocal space kernels.
...
...
@@ -1690,10 +1687,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cu.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
...
...
@@ -1934,29 +1937,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize<float4>(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map<string, string> replacements;
if (usePosqCharges) {
cu.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVec);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
if (hasLJ) {
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.upload(sigmaEpsilonVector);
if (hasLJ) {
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
...
...
@@ -1971,33 +1967,151 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
baseExceptionParams.initialize<float4>(cu, numExceptions, "baseExceptionParams");
vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions());
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
particleOffsetVec[particle].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exception].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, force.getNumParticles()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
if (force.getNumExceptionParameterOffsets() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
globalParams.initialize(cu, max((int) paramValues.size(), 1), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "globalParams");
recomputeParams = true;
// Initialize the kernel for updating parameters.
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
if (value != paramValues[i]) {
paramValues[i] = value;;
paramChanged = true;
}
}
if (paramChanged) {
recomputeParams = true;
if (cu.getUseDoublePrecision())
globalParams.upload(paramValues);
else {
vector<float> v(paramValues.size());
for (int i = 0; i < v.size(); i++)
v[i] = paramValues[i];
globalParams.upload(v);
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, pmeSyncEvent, 0);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (directPmeGrid.isInitialized()&& includeReciprocal) {
if (directPmeGrid.isInitialized()
&& includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
...
...
@@ -2153,7 +2267,6 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
...
...
@@ -2194,60 +2307,42 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the per-particle parameters.
vector<double> chargeVector(cu.getPaddedNumAtoms(), 0.0);
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms());
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
const vector<int>& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVector[i] = charge;
double sig = (0.5*sigma);
double eps = (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2((float) sig, (float) eps);
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
}
for (int i = force.getNumParticles(); i < cu.getPaddedNumAtoms(); i++)
sigmaEpsilonVector[i] = make_float2(0,0);
if (usePosqCharges)
cu.setCharges(chargeVector);
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVector);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVector[i];
charges.upload(c);
}
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
}
sigmaEpsilon.upload(sigmaEpsilonVector
);
baseParticleParams.upload(baseParticleParamVec
);
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4>
e
xceptionParamsVec
tor
(numExceptions);
vector<float4>
baseE
xceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
}
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == LJPME)
ewaldSelfEnergy += (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)*sumSquaredC6/12.0 : 0);
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
recomputeParams = true;
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
...
...
platforms/cuda/src/kernels/nonbondedParameters.cu
0 → 100644
View file @
7c6f0b4b
/**
* Compute the nonbonded parameters for particles and exceptions.
*/
extern
"C"
__global__
void
computeParameters
(
mixed
*
__restrict__
energyBuffer
,
bool
includeSelfEnergy
,
real
*
__restrict__
globalParams
,
int
numAtoms
,
const
float4
*
__restrict__
baseParticleParams
,
real4
*
__restrict__
posq
,
real
*
__restrict__
charge
,
float2
*
__restrict__
sigmaEpsilon
,
float4
*
__restrict__
particleParamOffsets
,
int
*
__restrict__
particleOffsetIndices
#ifdef HAS_EXCEPTIONS
,
int
numExceptions
,
const
float4
*
__restrict__
baseExceptionParams
,
float4
*
__restrict__
exceptionParams
,
float4
*
__restrict__
exceptionParamOffsets
,
int
*
__restrict__
exceptionOffsetIndices
#endif
)
{
mixed
energy
=
0
;
// Compute particle parameters.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseParticleParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
particleOffsetIndices
[
i
],
end
=
particleOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
particleParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
#ifdef USE_POSQ_CHARGES
posq
[
i
].
w
=
params
.
x
;
#else
charge
[
i
]
=
params
.
x
;
#endif
sigmaEpsilon
[
i
]
=
make_float2
(
0.5
f
*
params
.
y
,
2
*
SQRT
(
params
.
z
));
#ifdef INCLUDE_EWALD
energy
-=
EWALD_SELF_ENERGY_SCALE
*
params
.
x
*
params
.
x
;
#endif
#ifdef INCLUDE_LJPME
real
sig3
=
params
.
y
*
params
.
y
*
params
.
y
;
energy
+=
LJPME_SELF_ENERGY_SCALE
*
sig3
*
sig3
*
params
.
z
;
#endif
}
// Compute exception parameters.
#ifdef HAS_EXCEPTIONS
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numExceptions
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseExceptionParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
exceptionOffsetIndices
[
i
],
end
=
exceptionOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
exceptionParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
exceptionParams
[
i
]
=
make_float4
((
float
)
(
138.935456
f
*
params
.
x
),
(
float
)
params
.
y
,
(
float
)
(
4
*
params
.
z
),
0
);
}
#endif
if
(
includeSelfEnergy
)
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
platforms/opencl/include/OpenCLKernels.h
View file @
7c6f0b4b
...
...
@@ -586,7 +586,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
,
usePmeQueue
(
false
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
7c6f0b4b
...
...
@@ -1678,7 +1678,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cl.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy
+
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
ewaldSelfEnergy
-
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cl.doubleToString(pow(dispersionAlpha, 6)/3.0);
...
...
@@ -2164,7 +2164,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (usePmeQueue) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]);
pmeQueue.enqueueWaitForEvents(events);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
...
...
@@ -2465,6 +2471,14 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cl.invalidateMolecules(info);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment