Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dbcc494f
Commit
dbcc494f
authored
Jan 24, 2017
by
Peter Eastman
Browse files
Cleanup to CUDA dispersion PME code
parent
a9f65649
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
75 additions
and
170 deletions
+75
-170
openmmapi/include/openmm/internal/NonbondedForceImpl.h
openmmapi/include/openmm/internal/NonbondedForceImpl.h
+1
-1
openmmapi/src/NonbondedForceImpl.cpp
openmmapi/src/NonbondedForceImpl.cpp
+3
-4
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+6
-13
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+54
-144
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+2
-1
platforms/cuda/src/kernels/ljpme.cu
platforms/cuda/src/kernels/ljpme.cu
+9
-7
No files found.
openmmapi/include/openmm/internal/NonbondedForceImpl.h
View file @
dbcc494f
...
@@ -75,7 +75,7 @@ public:
...
@@ -75,7 +75,7 @@ public:
* This is a utility routine that calculates the values to use for alpha and grid size when using
* This is a utility routine that calculates the values to use for alpha and grid size when using
* Particle Mesh Ewald.
* Particle Mesh Ewald.
*/
*/
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
);
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
);
/**
/**
* Compute the coefficient which, when divided by the periodic box volume, gives the
* Compute the coefficient which, when divided by the periodic box volume, gives the
* long range dispersion correction to the energy.
* long range dispersion correction to the energy.
...
...
openmmapi/src/NonbondedForceImpl.cpp
View file @
dbcc494f
...
@@ -151,12 +151,11 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
...
@@ -151,12 +151,11 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
kmaxz
++
;
kmaxz
++
;
}
}
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
)
{
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
)
{
if
(
LJ
)
{
if
(
lj
)
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
else
{
else
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
if
(
alpha
==
0.0
)
{
if
(
alpha
==
0.0
)
{
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
...
...
platforms/cuda/include/CudaKernels.h
View file @
dbcc494f
...
@@ -598,10 +598,9 @@ private:
...
@@ -598,10 +598,9 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
C6s
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
directDispersionPmeGrid
(
NULL
),
reciprocalDispersionPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeAtomDispersionGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeEnergyBuffer
(
NULL
),
dispersionPmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
),
dispersionPmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -666,28 +665,22 @@ private:
...
@@ -666,28 +665,22 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
bool
hasInitializedFFT
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
C6s
;
CudaArray
*
exceptionParams
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
*
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
*
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
directDispersionPmeGrid
;
CudaArray
*
reciprocalDispersionPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomDispersionGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
dispersionPmeEnergyBuffer
;
CudaSort
*
sort
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
PmeIO
*
dispersionPmeio
;
CUstream
pmeStream
;
CUstream
pmeStream
,
dispersionPmeStream
;
CUevent
pmeSyncEvent
;
CUevent
pmeSyncEvent
,
dispersionPmeSyncEvent
;
CudaFFT3D
*
fft
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
cufftHandle
fftBackward
;
...
@@ -710,7 +703,7 @@ private:
...
@@ -710,7 +703,7 @@ private:
CUfunction
pmeInterpolateDispersionForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
dbcc494f
...
@@ -1593,20 +1593,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1593,20 +1593,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
delete sigmaEpsilon;
if (C6s != NULL)
delete C6s;
if (exceptionParams != NULL)
if (exceptionParams != NULL)
delete exceptionParams;
delete exceptionParams;
if (cosSinSums != NULL)
if (cosSinSums != NULL)
delete cosSinSums;
delete cosSinSums;
if (directPmeGrid != NULL)
if (directPmeGrid != NULL)
delete directPmeGrid;
delete directPmeGrid;
if (directDispersionPmeGrid != NULL)
delete directDispersionPmeGrid;
if (reciprocalPmeGrid != NULL)
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
delete reciprocalPmeGrid;
if (reciprocalDispersionPmeGrid != NULL)
delete reciprocalDispersionPmeGrid;
if (pmeBsplineModuliX != NULL)
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
if (pmeBsplineModuliY != NULL)
...
@@ -1617,20 +1611,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1617,20 +1611,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeAtomRange;
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
delete pmeAtomGridIndex;
if (pmeAtomDispersionGridIndex != NULL)
delete pmeAtomDispersionGridIndex;
if (pmeEnergyBuffer != NULL)
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
delete pmeEnergyBuffer;
if (dispersionPmeEnergyBuffer != NULL)
delete dispersionPmeEnergyBuffer;
if (sort != NULL)
if (sort != NULL)
delete sort;
delete sort;
if (fft != NULL)
if (fft != NULL)
delete fft;
delete fft;
if (pmeio != NULL)
if (pmeio != NULL)
delete pmeio;
delete pmeio;
if (dispersionPmeio != NULL)
delete dispersionPmeio;
if (hasInitializedFFT) {
if (hasInitializedFFT) {
if (useCudaFFT) {
if (useCudaFFT) {
cufftDestroy(fftForward);
cufftDestroy(fftForward);
...
@@ -1639,10 +1627,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1639,10 +1627,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if (usePmeStream) {
if (usePmeStream) {
cuStreamDestroy(pmeStream);
cuStreamDestroy(pmeStream);
cuEventDestroy(pmeSyncEvent);
cuEventDestroy(pmeSyncEvent);
if(doLJPME){
cuStreamDestroy(dispersionPmeStream);
cuEventDestroy(dispersionPmeSyncEvent);
}
}
}
}
}
}
}
...
@@ -1668,25 +1652,12 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1668,25 +1652,12 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
// Pack the C6 coeffiecient in with sigma and epsilon, in case LJPME is being used. The C6
doLJPME = (nonbondedMethod == LJPME);
// coefficients could live in a separate array, but that would hurt cache efficiency for LJPME.
doLJPME = nonbondedMethod == LJPME;
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if(doLJPME){
if (cu.getUseDoublePrecision())
C6s = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "C6s");
else
C6s = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "C6s");
}
CudaArray& posq = cu.getPosq();
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
double4* posqd = (double4*) &temp[0];
// The C6 coefficients for LJPME could be computed from sigma and epsilon, but it
// seems like a good idea to cache them and avoid many recomputations.
vector<double> tmpc6(posq.getSize());
float* c6f = (float*) &tmpc6[0];
double* c6d = (double*) &tmpc6[0];
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredCharges = 0.0;
...
@@ -1703,16 +1674,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1703,16 +1674,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
double sig = (float) (0.5*sigma);
double sig = (float) (0.5*sigma);
double eps = (float) (2.0*sqrt(epsilon));
double eps = (float) (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2(sig, eps);
sigmaEpsilonVector[i] = make_float2(sig, eps);
if(doLJPME){
float C6 = (float) 8.0*pow(sig, 3) * eps;
sumSquaredC6 += C6*C6;
if (cu.getUseDoublePrecision())
c6d[i] = 8.0*pow(sig,3)*eps;
else
c6f[i] = 8.0f*pow((float)sig,3)*eps;
}
exclusionList[i].push_back(i);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
if (charge != 0.0)
hasCoulomb = true;
hasCoulomb = true;
if (epsilon != 0.0)
if (epsilon != 0.0)
...
@@ -1724,8 +1689,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1724,8 +1689,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
posq.upload(&temp[0]);
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
if(doLJPME)
C6s->upload(&tmpc6[0]);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
map<string, string> defines;
...
@@ -1755,7 +1718,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1755,7 +1718,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
dispersionCoefficient = 0.0;
alpha = 0;
alpha = 0;
ewaldSelfEnergy = 0.0;
ewaldSelfEnergy = 0.0;
dispersionSelfEnergy = 0.0;
if (nonbondedMethod == Ewald) {
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
// Compute the Ewald parameters.
...
@@ -1792,7 +1754,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1792,7 +1754,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
if(doLJPME){
if
(doLJPME)
{
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = CudaFFT3D::findLegalDimension(dispersionGridSizeX);
dispersionGridSizeX = CudaFFT3D::findLegalDimension(dispersionGridSizeX);
...
@@ -1801,7 +1763,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1801,7 +1763,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
if(doLJPME) {
if
(doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dalphaR = dispersionAlpha * force.getCutoffDistance();
...
@@ -1816,7 +1778,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1816,7 +1778,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["DO_LJPME"] = doLJPME ? "1" : "0";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (cu.getContextIndex() == 0) {
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if(doLJPME) dispersionSelfEnergy = pow(dispersionAlpha, 6) * sumSquaredC6 / 12.0;
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
char deviceName[100];
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
cuDeviceGetName(deviceName, 100, cu.getDevice());
...
@@ -1836,7 +1799,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1836,7 +1799,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["USE_PME_STREAM"] = "1";
pmeDefines["USE_PME_STREAM"] = "1";
if (cu.getPlatformData().deterministicForces)
if (cu.getPlatformData().deterministicForces)
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
if (doLJPME){
if (doLJPME)
{
pmeDefines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["DISPERSION_GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["DISPERSION_GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["DISPERSION_GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
pmeDefines["DISPERSION_GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
...
@@ -1844,11 +1807,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1844,11 +1807,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["RECIP_DISPERSION_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["RECIP_DISPERSION_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
}
}
CUmodule module;
CUmodule module;
if(doLJPME)
{
if
(doLJPME)
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme+CudaKernelSources::ljpme, pmeDefines);
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme+CudaKernelSources::ljpme, pmeDefines);
}
else
{
else
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
}
if (cu.getPlatformData().useCpuPme) {
if (cu.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
// Create the CPU PME kernel.
...
@@ -1859,7 +1821,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1859,7 +1821,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeio = new PmeIO(cu, addForcesKernel);
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
if(doLJPME){
if
(doLJPME)
{
cpuDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSizeX, dispersionGridSizeY,
cpuDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, numParticles, dispersionAlpha);
dispersionGridSizeZ, numParticles, dispersionAlpha);
...
@@ -1880,7 +1842,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1880,7 +1842,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
if(doLJPME){
if
(doLJPME)
{
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadC6");
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadC6");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomDispersionGridIndex");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomDispersionGridIndex");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadC6");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadC6");
...
@@ -1893,31 +1855,20 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1893,31 +1855,20 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Create required data structures.
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "reciprocalPmeGrid");
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
cu.addAutoclearBuffer(*directPmeGrid);
if(doLJPME){
directDispersionPmeGrid = new CudaArray(cu, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ,
cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalDispersionPmeGrid");
cu.addAutoclearBuffer(*directDispersionPmeGrid);
reciprocalDispersionPmeGrid = new CudaArray(cu, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ,
2*elementSize, "reciprocalDispersionPmeGrid");
}
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
if(doLJPME)
pmeAtomDispersionGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomDispersionGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(*pmeEnergyBuffer);
cu.clearBuffer(*pmeEnergyBuffer);
if(doLJPME){
dispersionPmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize,
"dispersionPmeEnergyBuffer");
cu.clearBuffer(*dispersionPmeEnergyBuffer);
}
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
int cufftVersion;
cufftGetVersion(&cufftVersion);
cufftGetVersion(&cufftVersion);
...
@@ -1929,7 +1880,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1929,7 +1880,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
if(doLJPME){
if
(doLJPME)
{
result = cufftPlan3d(&dispersionFftForward, dispersionGridSizeX, dispersionGridSizeY,
result = cufftPlan3d(&dispersionFftForward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
if (result != CUFFT_SUCCESS)
...
@@ -1959,19 +1910,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1959,19 +1910,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
recipForceGroup = force.getForceGroup();
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
if(doLJPME){
cuStreamCreate(&dispersionPmeStream, CU_STREAM_NON_BLOCKING);
// Dispersion terms use yet another stream.
if (useCudaFFT) {
cufftSetStream(dispersionFftForward, dispersionPmeStream);
cufftSetStream(dispersionFftBackward, dispersionPmeStream);
}
CHECK_RESULT(cuEventCreate(&dispersionPmeSyncEvent, CU_EVENT_DISABLE_TIMING),
"Error creating event for Dispersion term of NonbondedForce");
// The force group is the same as the electrostatic PME reciprocal force group.
cu.addPreComputation(new SyncStreamPreComputation(cu, dispersionPmeStream, dispersionPmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, dispersionPmeSyncEvent, cu.getKernel(module, "addEnergy"), *dispersionPmeEnergyBuffer, recipForceGroup));
}
}
}
hasInitializedFFT = true;
hasInitializedFFT = true;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -2051,18 +1989,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -2051,18 +1989,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
{
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
sizeof(float2), sigmaEpsilon->getDevicePointer()));
if(doLJPME){
if (cu.getUseDoublePrecision())
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("C6s", "double", 1,
sizeof(double), C6s->getDevicePointer()));
else
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("C6s", "float", 1,
sizeof(float), C6s->getDevicePointer()));
}
}
// Initialize the exceptions.
// Initialize the exceptions.
...
@@ -2142,7 +2071,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2142,7 +2071,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs,
directPmeGrid->get
Size
()
, 256);
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs,
gridSizeX*gridSizeY*grid
Size
Z
, 256);
}
}
if (useCudaFFT) {
if (useCudaFFT) {
...
@@ -2181,84 +2110,74 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2181,84 +2110,74 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream);
cu.restoreDefaultStream();
}
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
// now we assume that LJPME can only be used if electrostatic PME is also active.
if(doLJPME){
if (doLJPME) {
if (usePmeStream)
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.setCurrentStream(dispersionPmeStream);
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomDispersionGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtom
Dispersion
GridIndex);
sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directDispersionPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.clearBuffer(*directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtom
Dispersion
GridIndex->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&
C6s
->getDevicePointer()};
&
sigmaEpsilon
->getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&direct
Dispersion
PmeGrid->getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, di
rectDispersionPmeGrid->get
Size
()
, 256);
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, di
spersionGridSizeX*dispersionGridSizeY*dispersionGrid
Size
Z
, 256);
}
}
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) direct
Dispersion
PmeGrid->getDevicePointer(), (double2*) reciprocal
Dispersion
PmeGrid->getDevicePointer());
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
else
else
cufftExecR2C(dispersionFftForward, (float*) direct
Dispersion
PmeGrid->getDevicePointer(), (float2*) reciprocal
Dispersion
PmeGrid->getDevicePointer());
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
}
}
else {
else {
fft->execFFT(*direct
Dispersion
PmeGrid, *reciprocal
Dispersion
PmeGrid, true);
fft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
}
}
if (includeEnergy) {
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocal
Dispersion
PmeGrid->getDevicePointer(), usePmeStream ? &
dispersionP
meEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &
p
meEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
&C6s->getDevicePointer()};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
}
void* convolutionArgs[] = {&reciprocal
Dispersion
PmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocal
Dispersion
PmeGrid->getDevicePointer(), (double*) direct
Dispersion
PmeGrid->getDevicePointer());
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
else
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocal
Dispersion
PmeGrid->getDevicePointer(), (float*) direct
Dispersion
PmeGrid->getDevicePointer());
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer());
}
}
else {
else {
fft->execFFT(*reciprocal
Dispersion
PmeGrid, *direct
Dispersion
PmeGrid, false);
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
}
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &direct
Dispersion
PmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtom
Dispersion
GridIndex->getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&
C6s
->getDevicePointer()};
&
sigmaEpsilon
->getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
if (usePmeStream) {
cuEventRecord(
dispersionP
meSyncEvent,
dispersionP
meStream);
cuEventRecord(
p
meSyncEvent,
p
meStream);
cu.restoreDefaultStream();
cu.restoreDefaultStream();
}
}
}
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if(includeReciprocal) energy += dispersionSelfEnergy;
if (dispersionCoefficient != 0.0 && includeDirect) {
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
...
@@ -2304,7 +2223,6 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2304,7 +2223,6 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
float4* posqf = (float4*) cu.getPinnedBuffer();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float3> sigmaEpsilonC6Vector(cu.getPaddedNumAtoms(), make_float3(0, 0, 0));
double sumSquaredCharges = 0.0;
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
double sumSquaredC6 = 0.0;
const vector<int>& order = cu.getAtomIndex();
const vector<int>& order = cu.getAtomIndex();
...
@@ -2318,21 +2236,13 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2318,21 +2236,13 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
posqf[i].w = (float) charge;
posqf[i].w = (float) charge;
float sig = (float) (0.5*sigma);
float sig = (float) (0.5*sigma);
float eps = (float) (2.0*sqrt(epsilon));
float eps = (float) (2.0*sqrt(epsilon));
if(doLJPME){
float C6 = (float) 8.0*pow(sig, 3) * eps;
sumSquaredC6 += C6*C6;
sigmaEpsilonC6Vector[index] = make_float3(sig, eps, C6);
}else{
sigmaEpsilonVector[index] = make_float2(sig, eps);
sigmaEpsilonVector[index] = make_float2(sig, eps);
}
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
sumSquaredCharges += charge*charge;
}
}
posq.upload(cu.getPinnedBuffer());
posq.upload(cu.getPinnedBuffer());
if(doLJPME){
sigmaEpsilon->upload(sigmaEpsilonC6Vector);
}else{
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
}
// Record the exceptions.
// Record the exceptions.
...
@@ -2349,10 +2259,10 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2349,10 +2259,10 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Compute other values.
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME
|| nonbondedMethod == LJPME
)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == LJPME)
if (nonbondedMethod == LJPME)
dispersion
SelfEnergy = (cu.getContextIndex() == 0 ?
pow(dispersionAlpha, 6)
*
sumSquaredC6
/
12.0 : 0);
ewald
SelfEnergy
+
= (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)
*
sumSquaredC6
/
12.0 : 0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
cu.invalidateMolecules();
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
dbcc494f
...
@@ -30,7 +30,8 @@
...
@@ -30,7 +30,8 @@
const
real
dar6
=
dar4
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
c6
=
C6s1
*
C6s2
;
const
real2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
...
...
platforms/cuda/src/kernels/ljpme.cu
View file @
dbcc494f
...
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__
...
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -64,6 +64,8 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
...
@@ -64,6 +64,8 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
// Spread the charge from this atom onto each grid point.
// Spread the charge from this atom onto each grid point.
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
c6
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
...
@@ -81,8 +83,7 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
...
@@ -81,8 +83,7 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
// We need to grab the C6 coefficient from the array
real
add
=
c6
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
C6s
[
atom
]
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
@@ -241,7 +242,7 @@ gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __r
...
@@ -241,7 +242,7 @@ gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __r
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -313,7 +314,8 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
...
@@ -313,7 +314,8 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
}
}
}
}
}
}
real
q
=
C6s
[
atom
];
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
real
q
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment