Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dbcc494f
"vscode:/vscode.git/clone" did not exist on "8eeee16de9bc772321ae2b87672700b076913b56"
Commit
dbcc494f
authored
Jan 24, 2017
by
Peter Eastman
Browse files
Cleanup to CUDA dispersion PME code
parent
a9f65649
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
75 additions
and
170 deletions
+75
-170
openmmapi/include/openmm/internal/NonbondedForceImpl.h
openmmapi/include/openmm/internal/NonbondedForceImpl.h
+1
-1
openmmapi/src/NonbondedForceImpl.cpp
openmmapi/src/NonbondedForceImpl.cpp
+3
-4
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+6
-13
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+54
-144
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+2
-1
platforms/cuda/src/kernels/ljpme.cu
platforms/cuda/src/kernels/ljpme.cu
+9
-7
No files found.
openmmapi/include/openmm/internal/NonbondedForceImpl.h
View file @
dbcc494f
...
...
@@ -75,7 +75,7 @@ public:
* This is a utility routine that calculates the values to use for alpha and grid size when using
* Particle Mesh Ewald.
*/
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
);
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
);
/**
* Compute the coefficient which, when divided by the periodic box volume, gives the
* long range dispersion correction to the energy.
...
...
openmmapi/src/NonbondedForceImpl.cpp
View file @
dbcc494f
...
...
@@ -151,12 +151,11 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
kmaxz
++
;
}
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
)
{
if
(
LJ
)
{
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
lj
)
{
if
(
lj
)
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
else
{
else
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
if
(
alpha
==
0.0
)
{
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
...
...
platforms/cuda/include/CudaKernels.h
View file @
dbcc494f
...
...
@@ -598,10 +598,9 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
C6s
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
directDispersionPmeGrid
(
NULL
),
reciprocalDispersionPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeAtomDispersionGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
dispersionPmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
),
dispersionPmeio
(
NULL
)
{
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
~
CudaCalcNonbondedForceKernel
();
/**
...
...
@@ -666,28 +665,22 @@ private:
CudaContext
&
cu
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
C6s
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
directDispersionPmeGrid
;
CudaArray
*
reciprocalDispersionPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomDispersionGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
dispersionPmeEnergyBuffer
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
PmeIO
*
dispersionPmeio
;
CUstream
pmeStream
,
dispersionPmeStream
;
CUevent
pmeSyncEvent
,
dispersionPmeSyncEvent
;
CUstream
pmeStream
;
CUevent
pmeSyncEvent
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
...
...
@@ -710,7 +703,7 @@ private:
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
dbcc494f
...
...
@@ -1593,20 +1593,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (C6s != NULL)
delete C6s;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (directPmeGrid != NULL)
delete directPmeGrid;
if (directDispersionPmeGrid != NULL)
delete directDispersionPmeGrid;
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
if (reciprocalDispersionPmeGrid != NULL)
delete reciprocalDispersionPmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
...
...
@@ -1617,20 +1611,14 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeAtomDispersionGridIndex != NULL)
delete pmeAtomDispersionGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (dispersionPmeEnergyBuffer != NULL)
delete dispersionPmeEnergyBuffer;
if (sort != NULL)
delete sort;
if (fft != NULL)
delete fft;
if (pmeio != NULL)
delete pmeio;
if (dispersionPmeio != NULL)
delete dispersionPmeio;
if (hasInitializedFFT) {
if (useCudaFFT) {
cufftDestroy(fftForward);
...
...
@@ -1639,10 +1627,6 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if (usePmeStream) {
cuStreamDestroy(pmeStream);
cuEventDestroy(pmeSyncEvent);
if(doLJPME){
cuStreamDestroy(dispersionPmeStream);
cuEventDestroy(dispersionPmeSyncEvent);
}
}
}
}
...
...
@@ -1668,25 +1652,12 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
// Pack the C6 coeffiecient in with sigma and epsilon, in case LJPME is being used. The C6
// coefficients could live in a separate array, but that would hurt cache efficiency for LJPME.
doLJPME = nonbondedMethod == LJPME;
doLJPME = (nonbondedMethod == LJPME);
sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if(doLJPME){
if (cu.getUseDoublePrecision())
C6s = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "C6s");
else
C6s = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "C6s");
}
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
double4* posqd = (double4*) &temp[0];
// The C6 coefficients for LJPME could be computed from sigma and epsilon, but it
// seems like a good idea to cache them and avoid many recomputations.
vector<double> tmpc6(posq.getSize());
float* c6f = (float*) &tmpc6[0];
double* c6d = (double*) &tmpc6[0];
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
...
...
@@ -1703,16 +1674,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
double sig = (float) (0.5*sigma);
double eps = (float) (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2(sig, eps);
if(doLJPME){
float C6 = (float) 8.0*pow(sig, 3) * eps;
sumSquaredC6 += C6*C6;
if (cu.getUseDoublePrecision())
c6d[i] = 8.0*pow(sig,3)*eps;
else
c6f[i] = 8.0f*pow((float)sig,3)*eps;
}
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
...
...
@@ -1724,8 +1689,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
if(doLJPME)
C6s->upload(&tmpc6[0]);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
...
...
@@ -1755,7 +1718,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
dispersionSelfEnergy = 0.0;
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
...
...
@@ -1792,7 +1754,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
if(doLJPME){
if
(doLJPME)
{
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = CudaFFT3D::findLegalDimension(dispersionGridSizeX);
...
...
@@ -1801,7 +1763,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
if(doLJPME) {
if
(doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
...
...
@@ -1816,7 +1778,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if(doLJPME) dispersionSelfEnergy = pow(dispersionAlpha, 6) * sumSquaredC6 / 12.0;
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
...
...
@@ -1836,7 +1799,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["USE_PME_STREAM"] = "1";
if (cu.getPlatformData().deterministicForces)
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
if (doLJPME){
if (doLJPME)
{
pmeDefines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["DISPERSION_GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["DISPERSION_GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
...
...
@@ -1844,11 +1807,10 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["RECIP_DISPERSION_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
}
CUmodule module;
if(doLJPME)
{
if
(doLJPME)
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme+CudaKernelSources::ljpme, pmeDefines);
}
else
{
else
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
}
if (cu.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
...
...
@@ -1859,7 +1821,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
if(doLJPME){
if
(doLJPME)
{
cpuDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, numParticles, dispersionAlpha);
...
...
@@ -1880,7 +1842,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
if(doLJPME){
if
(doLJPME)
{
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadC6");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomDispersionGridIndex");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadC6");
...
...
@@ -1893,31 +1855,20 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "reciprocalPmeGrid");
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
if(doLJPME){
directDispersionPmeGrid = new CudaArray(cu, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ,
cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalDispersionPmeGrid");
cu.addAutoclearBuffer(*directDispersionPmeGrid);
reciprocalDispersionPmeGrid = new CudaArray(cu, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ,
2*elementSize, "reciprocalDispersionPmeGrid");
}
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
if(doLJPME)
pmeAtomDispersionGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomDispersionGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(*pmeEnergyBuffer);
if(doLJPME){
dispersionPmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize,
"dispersionPmeEnergyBuffer");
cu.clearBuffer(*dispersionPmeEnergyBuffer);
}
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
cufftGetVersion(&cufftVersion);
...
...
@@ -1929,7 +1880,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
if(doLJPME){
if
(doLJPME)
{
result = cufftPlan3d(&dispersionFftForward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
...
...
@@ -1959,19 +1910,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
if(doLJPME){
cuStreamCreate(&dispersionPmeStream, CU_STREAM_NON_BLOCKING);
// Dispersion terms use yet another stream.
if (useCudaFFT) {
cufftSetStream(dispersionFftForward, dispersionPmeStream);
cufftSetStream(dispersionFftBackward, dispersionPmeStream);
}
CHECK_RESULT(cuEventCreate(&dispersionPmeSyncEvent, CU_EVENT_DISABLE_TIMING),
"Error creating event for Dispersion term of NonbondedForce");
// The force group is the same as the electrostatic PME reciprocal force group.
cu.addPreComputation(new SyncStreamPreComputation(cu, dispersionPmeStream, dispersionPmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, dispersionPmeSyncEvent, cu.getKernel(module, "addEnergy"), *dispersionPmeEnergyBuffer, recipForceGroup));
}
}
hasInitializedFFT = true;
// Initialize the b-spline moduli.
...
...
@@ -2051,18 +1989,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
{
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
if(doLJPME){
if (cu.getUseDoublePrecision())
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("C6s", "double", 1,
sizeof(double), C6s->getDevicePointer()));
else
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("C6s", "float", 1,
sizeof(float), C6s->getDevicePointer()));
}
}
// Initialize the exceptions.
...
...
@@ -2142,7 +2071,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs,
directPmeGrid->get
Size
()
, 256);
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs,
gridSizeX*gridSizeY*grid
Size
Z
, 256);
}
if (useCudaFFT) {
...
...
@@ -2181,84 +2110,74 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream);
cu.restoreDefaultStream();
}
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if(doLJPME){
if (usePmeStream)
cu.setCurrentStream(dispersionPmeStream);
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomDispersionGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtom
Dispersion
GridIndex);
sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directDispersionPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.clearBuffer(*directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtom
Dispersion
GridIndex->getDevicePointer(),
&
C6s
->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&
sigmaEpsilon
->getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&direct
Dispersion
PmeGrid->getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, di
rectDispersionPmeGrid->get
Size
()
, 256);
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, di
spersionGridSizeX*dispersionGridSizeY*dispersionGrid
Size
Z
, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) direct
Dispersion
PmeGrid->getDevicePointer(), (double2*) reciprocal
Dispersion
PmeGrid->getDevicePointer());
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
else
cufftExecR2C(dispersionFftForward, (float*) direct
Dispersion
PmeGrid->getDevicePointer(), (float2*) reciprocal
Dispersion
PmeGrid->getDevicePointer());
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
}
else {
fft->execFFT(*direct
Dispersion
PmeGrid, *reciprocal
Dispersion
PmeGrid, true);
fft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocal
Dispersion
PmeGrid->getDevicePointer(), usePmeStream ? &
dispersionP
meEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &
p
meEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&C6s->getDevicePointer()};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&reciprocal
Dispersion
PmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocal
Dispersion
PmeGrid->getDevicePointer(), (double*) direct
Dispersion
PmeGrid->getDevicePointer());
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocal
Dispersion
PmeGrid->getDevicePointer(), (float*) direct
Dispersion
PmeGrid->getDevicePointer());
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer());
}
else {
fft->execFFT(*reciprocal
Dispersion
PmeGrid, *direct
Dispersion
PmeGrid, false);
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &direct
Dispersion
PmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtom
Dispersion
GridIndex->getDevicePointer(),
&
C6s
->getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&
sigmaEpsilon
->getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
cuEventRecord(
dispersionP
meSyncEvent,
dispersionP
meStream);
cuEventRecord(
p
meSyncEvent,
p
meStream);
cu.restoreDefaultStream();
}
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if(includeReciprocal) energy += dispersionSelfEnergy;
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
...
...
@@ -2304,7 +2223,6 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<float3> sigmaEpsilonC6Vector(cu.getPaddedNumAtoms(), make_float3(0, 0, 0));
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
const vector<int>& order = cu.getAtomIndex();
...
...
@@ -2318,21 +2236,13 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
posqf[i].w = (float) charge;
float sig = (float) (0.5*sigma);
float eps = (float) (2.0*sqrt(epsilon));
if(doLJPME){
float C6 = (float) 8.0*pow(sig, 3) * eps;
sumSquaredC6 += C6*C6;
sigmaEpsilonC6Vector[index] = make_float3(sig, eps, C6);
}else{
sigmaEpsilonVector[index] = make_float2(sig, eps);
}
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
}
posq.upload(cu.getPinnedBuffer());
if(doLJPME){
sigmaEpsilon->upload(sigmaEpsilonC6Vector);
}else{
sigmaEpsilon->upload(sigmaEpsilonVector);
}
// Record the exceptions.
...
...
@@ -2349,10 +2259,10 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME
|| nonbondedMethod == LJPME
)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == LJPME)
dispersion
SelfEnergy = (cu.getContextIndex() == 0 ?
pow(dispersionAlpha, 6)
*
sumSquaredC6
/
12.0 : 0);
ewald
SelfEnergy
+
= (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)
*
sumSquaredC6
/
12.0 : 0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
dbcc494f
...
...
@@ -30,7 +30,8 @@
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
c6
=
C6s1
*
C6s2
;
const
real2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
...
...
platforms/cuda/src/kernels/ljpme.cu
View file @
dbcc494f
...
...
@@ -22,7 +22,7 @@ extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -64,6 +64,8 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
// Spread the charge from this atom onto each grid point.
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
c6
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
...
...
@@ -81,8 +83,7 @@ extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
// We need to grab the C6 coefficient from the array
real
add
=
C6s
[
atom
]
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
c6
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
...
@@ -241,7 +242,7 @@ gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __r
extern
"C"
__global__
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
2
*
__restrict__
sigmaEpsilon
)
{
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -313,7 +314,8 @@ void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned lon
}
}
}
real
q
=
C6s
[
atom
];
const
real2
sigEps
=
sigmaEpsilon
[
atom
];
real
q
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment