Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7c6f0b4b
Commit
7c6f0b4b
authored
Jun 26, 2018
by
Peter Eastman
Browse files
CUDA implementation of parameter offsets for NonbondedForce
parent
7ebaa816
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
251 additions
and
69 deletions
+251
-69
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+12
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+160
-65
platforms/cuda/src/kernels/nonbondedParameters.cu
platforms/cuda/src/kernels/nonbondedParameters.cu
+63
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+15
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
7c6f0b4b
...
@@ -609,7 +609,7 @@ private:
...
@@ -609,7 +609,7 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
,
usePmeStream
(
false
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -678,6 +678,13 @@ private:
...
@@ -678,6 +678,13 @@ private:
CudaArray
charges
;
CudaArray
charges
;
CudaArray
sigmaEpsilon
;
CudaArray
sigmaEpsilon
;
CudaArray
exceptionParams
;
CudaArray
exceptionParams
;
CudaArray
baseParticleParams
;
CudaArray
baseExceptionParams
;
CudaArray
particleParamOffsets
;
CudaArray
exceptionParamOffsets
;
CudaArray
particleOffsetIndices
;
CudaArray
exceptionOffsetIndices
;
CudaArray
globalParams
;
CudaArray
cosSinSums
;
CudaArray
cosSinSums
;
CudaArray
directPmeGrid
;
CudaArray
directPmeGrid
;
CudaArray
reciprocalPmeGrid
;
CudaArray
reciprocalPmeGrid
;
...
@@ -701,6 +708,7 @@ private:
...
@@ -701,6 +708,7 @@ private:
CudaFFT3D
*
dispersionFft
;
CudaFFT3D
*
dispersionFft
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftBackward
;
cufftHandle
dispersionFftBackward
;
CUfunction
computeParamsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
CUfunction
pmeGridIndexKernel
;
...
@@ -716,11 +724,13 @@ private:
...
@@ -716,11 +724,13 @@ private:
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
string
>
paramNames
;
std
::
vector
<
double
>
paramValues
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
,
usePosqCharges
,
recomputeParams
,
hasOffsets
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
7c6f0b4b
...
@@ -1579,24 +1579,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1579,24 +1579,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
vector<double> chargeVec(cu.getPaddedNumAtoms(), 0.0);
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasCoulomb = false;
hasLJ = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVec[i] = charge;
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
exclusionList[i].push_back(i);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
if (charge != 0.0)
hasCoulomb = true;
hasCoulomb = true;
if (epsilon != 0.0)
if (epsilon != 0.0)
...
@@ -1639,6 +1630,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1639,6 +1630,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
dispersionCoefficient = 0.0;
alpha = 0;
alpha = 0;
ewaldSelfEnergy = 0.0;
ewaldSelfEnergy = 0.0;
map<string, string> paramsDefines;
if (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_OFFSETS"] = "1";
if (nonbondedMethod == Ewald) {
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
// Compute the Ewald parameters.
...
@@ -1648,7 +1642,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1648,7 +1642,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
// Create the reciprocal space kernels.
// Create the reciprocal space kernels.
...
@@ -1690,10 +1687,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1690,10 +1687,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (doLJPME)
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (cu.getContextIndex() == 0) {
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
paramsDefines["INCLUDE_EWALD"] = "1";
if (doLJPME)
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cu.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
char deviceName[100];
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
...
@@ -1934,29 +1937,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1934,29 +1937,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize<float4>(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map<string, string> replacements;
map<string, string> replacements;
if (usePosqCharges) {
if (usePosqCharges) {
cu.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
replacements["CHARGE2"] = "posq2.w";
paramsDefines["USE_POSQ_CHARGES"] = "1";
}
}
else {
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVec);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
replacements["CHARGE2"] = prefix+"charge2";
}
}
if (hasCoulomb)
if (hasCoulomb)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
if (hasLJ) {
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.upload(sigmaEpsilonVector);
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
...
@@ -1971,33 +1967,151 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1971,33 +1967,151 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
baseExceptionParams.initialize<float4>(cu, numExceptions, "baseExceptionParams");
vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
map<string, string> replacements;
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
}
// Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions());
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
particleOffsetVec[particle].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exception].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, force.getNumParticles()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
if (force.getNumExceptionParameterOffsets() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
globalParams.initialize(cu, max((int) paramValues.size(), 1), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "globalParams");
recomputeParams = true;
// Initialize the kernel for updating parameters.
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
}
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
if (value != paramValues[i]) {
paramValues[i] = value;;
paramChanged = true;
}
}
if (paramChanged) {
recomputeParams = true;
if (cu.getUseDoublePrecision())
globalParams.upload(paramValues);
else {
vector<float> v(paramValues.size());
for (int i = 0; i < v.size(); i++)
v[i] = paramValues[i];
globalParams.upload(v);
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
if (exceptionParams.isInitialized()) {
int numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, pmeSyncEvent, 0);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
}
if (directPmeGrid.isInitialized()&& includeReciprocal) {
if (directPmeGrid.isInitialized()
&& includeReciprocal) {
if (usePmeStream)
if (usePmeStream)
cu.setCurrentStream(pmeStream);
cu.setCurrentStream(pmeStream);
...
@@ -2153,7 +2267,6 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2153,7 +2267,6 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
}
}
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
...
@@ -2194,60 +2307,42 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2194,60 +2307,42 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the per-particle parameters.
// Record the per-particle parameters.
vector<double> chargeVector(cu.getPaddedNumAtoms(), 0.0);
vector<float4> baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms());
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
const vector<int>& order = cu.getAtomIndex();
const vector<int>& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVector[i] = charge;
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
double sig = (0.5*sigma);
double eps = (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2((float) sig, (float) eps);
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
}
for (int i = force.getNumParticles(); i < cu.getPaddedNumAtoms(); i++)
sigmaEpsilonVector[i] = make_float2(0,0);
if (usePosqCharges)
cu.setCharges(chargeVector);
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVector);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVector[i];
charges.upload(c);
}
}
}
sigmaEpsilon.upload(sigmaEpsilonVector
);
baseParticleParams.upload(baseParticleParamVec
);
// Record the exceptions.
// Record the exceptions.
if (numExceptions > 0) {
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4>
e
xceptionParamsVec
tor
(numExceptions);
vector<float4>
baseE
xceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
e
xceptionParamsVec
tor
[i] = make_float4(
(float) (ONE_4PI_EPS0*
chargeProd
)
,
(float) sigma, (float) (4.0*
epsilon
)
, 0
.0f
);
baseE
xceptionParamsVec[i] = make_float4(chargeProd,
sigma,
epsilon, 0);
}
}
e
xceptionParams.upload(
e
xceptionParamsVec
tor
);
baseE
xceptionParams.upload(
baseE
xceptionParamsVec);
}
}
// Compute other values.
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME)
ewaldSelfEnergy = 0.0;
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
if (nonbondedMethod == LJPME)
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy += (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)*sumSquaredC6/12.0 : 0);
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
cu.invalidateMolecules();
recomputeParams = true;
}
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
...
...
platforms/cuda/src/kernels/nonbondedParameters.cu
0 → 100644
View file @
7c6f0b4b
/**
* Compute the nonbonded parameters for particles and exceptions.
*/
extern
"C"
__global__
void
computeParameters
(
mixed
*
__restrict__
energyBuffer
,
bool
includeSelfEnergy
,
real
*
__restrict__
globalParams
,
int
numAtoms
,
const
float4
*
__restrict__
baseParticleParams
,
real4
*
__restrict__
posq
,
real
*
__restrict__
charge
,
float2
*
__restrict__
sigmaEpsilon
,
float4
*
__restrict__
particleParamOffsets
,
int
*
__restrict__
particleOffsetIndices
#ifdef HAS_EXCEPTIONS
,
int
numExceptions
,
const
float4
*
__restrict__
baseExceptionParams
,
float4
*
__restrict__
exceptionParams
,
float4
*
__restrict__
exceptionParamOffsets
,
int
*
__restrict__
exceptionOffsetIndices
#endif
)
{
mixed
energy
=
0
;
// Compute particle parameters.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseParticleParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
particleOffsetIndices
[
i
],
end
=
particleOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
particleParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
#ifdef USE_POSQ_CHARGES
posq
[
i
].
w
=
params
.
x
;
#else
charge
[
i
]
=
params
.
x
;
#endif
sigmaEpsilon
[
i
]
=
make_float2
(
0.5
f
*
params
.
y
,
2
*
SQRT
(
params
.
z
));
#ifdef INCLUDE_EWALD
energy
-=
EWALD_SELF_ENERGY_SCALE
*
params
.
x
*
params
.
x
;
#endif
#ifdef INCLUDE_LJPME
real
sig3
=
params
.
y
*
params
.
y
*
params
.
y
;
energy
+=
LJPME_SELF_ENERGY_SCALE
*
sig3
*
sig3
*
params
.
z
;
#endif
}
// Compute exception parameters.
#ifdef HAS_EXCEPTIONS
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numExceptions
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
params
=
baseExceptionParams
[
i
];
#ifdef HAS_OFFSETS
int
start
=
exceptionOffsetIndices
[
i
],
end
=
exceptionOffsetIndices
[
i
+
1
];
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
float4
offset
=
exceptionParamOffsets
[
j
];
real
value
=
globalParams
[(
int
)
offset
.
w
];
params
.
x
+=
value
*
offset
.
x
;
params
.
y
+=
value
*
offset
.
y
;
params
.
z
+=
value
*
offset
.
z
;
}
#endif
exceptionParams
[
i
]
=
make_float4
((
float
)
(
138.935456
f
*
params
.
x
),
(
float
)
params
.
y
,
(
float
)
(
4
*
params
.
z
),
0
);
}
#endif
if
(
includeSelfEnergy
)
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
platforms/opencl/include/OpenCLKernels.h
View file @
7c6f0b4b
...
@@ -586,7 +586,7 @@ private:
...
@@ -586,7 +586,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
,
usePmeQueue
(
false
)
{
}
}
~
OpenCLCalcNonbondedForceKernel
();
~
OpenCLCalcNonbondedForceKernel
();
/**
/**
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
7c6f0b4b
...
@@ -1678,7 +1678,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1678,7 +1678,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cl.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cl.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy
+
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
ewaldSelfEnergy
-
= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cl.doubleToString(pow(dispersionAlpha, 6)/3.0);
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cl.doubleToString(pow(dispersionAlpha, 6)/3.0);
...
@@ -2164,7 +2164,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2164,7 +2164,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) {
if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (usePmeQueue) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]);
pmeQueue.enqueueWaitForEvents(events);
}
energy = 0.0; // The Ewald self energy was computed in the kernel.
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
}
// Do reciprocal space calculations.
// Do reciprocal space calculations.
...
@@ -2465,6 +2471,14 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -2465,6 +2471,14 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
// Compute other values.
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cl.invalidateMolecules(info);
cl.invalidateMolecules(info);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment