Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a9f65649
Commit
a9f65649
authored
Jan 23, 2017
by
Peter Eastman
Browse files
Merge branch 'dpme' of
https://github.com/andysim/openmm
into ljpme
parents
9567ddb3
58b6e3b6
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1236 additions
and
125 deletions
+1236
-125
olla/include/openmm/kernels.h
olla/include/openmm/kernels.h
+63
-2
openmmapi/include/openmm/NonbondedForce.h
openmmapi/include/openmm/NonbondedForce.h
+41
-3
openmmapi/include/openmm/internal/NonbondedForceImpl.h
openmmapi/include/openmm/internal/NonbondedForceImpl.h
+2
-1
openmmapi/src/NonbondedForce.cpp
openmmapi/src/NonbondedForce.cpp
+18
-0
openmmapi/src/NonbondedForceImpl.cpp
openmmapi/src/NonbondedForceImpl.cpp
+10
-2
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+15
-5
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+49
-14
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+15
-1
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+15
-0
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+60
-9
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+163
-51
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+44
-5
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+57
-3
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+34
-6
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+9
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+264
-22
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+4
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+4
-0
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+44
-1
platforms/cuda/src/kernels/ljpme.cu
platforms/cuda/src/kernels/ljpme.cu
+325
-0
No files found.
olla/include/openmm/kernels.h
View file @
a9f65649
...
@@ -555,7 +555,8 @@ public:
...
@@ -555,7 +555,8 @@ public:
CutoffNonPeriodic
=
1
,
CutoffNonPeriodic
=
1
,
CutoffPeriodic
=
2
,
CutoffPeriodic
=
2
,
Ewald
=
3
,
Ewald
=
3
,
PME
=
4
PME
=
4
,
LJPME
=
5
};
};
static
std
::
string
Name
()
{
static
std
::
string
Name
()
{
return
"CalcNonbondedForce"
;
return
"CalcNonbondedForce"
;
...
@@ -589,13 +590,22 @@ public:
...
@@ -589,13 +590,22 @@ public:
virtual
void
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
)
=
0
;
virtual
void
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
)
=
0
;
/**
/**
* Get the parameters being used for PME.
* Get the parameters being used for PME.
*
*
* @param alpha the separation parameter
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
virtual
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
=
0
;
virtual
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
=
0
;
/**
* Get the parameters being used for the dispersion terms in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
virtual
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
=
0
;
};
};
/**
/**
...
@@ -1335,6 +1345,57 @@ public:
...
@@ -1335,6 +1345,57 @@ public:
};
};
/**
* This kernel performs the dispersion reciprocal space calculation for LJPME. In most cases, this
* calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
* In some cases it may want to outsource the work to a different kernel. In particular,
* GPU based platforms sometimes use a CPU based implementation provided by a separate
* plugin.
*/
class
CalcDispersionPmeReciprocalForceKernel
:
public
KernelImpl
{
public:
class
IO
;
static
std
::
string
Name
()
{
return
"CalcDispersionPmeReciprocalForce"
;
}
CalcDispersionPmeReciprocalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
)
:
KernelImpl
(
name
,
platform
)
{
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
virtual
void
initialize
(
int
gridx
,
int
gridy
,
int
gridz
,
int
numParticles
,
double
alpha
)
=
0
;
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxVectors the vectors defining the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
virtual
void
beginComputation
(
IO
&
io
,
const
Vec3
*
periodicBoxVectors
,
bool
includeEnergy
)
=
0
;
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
virtual
double
finishComputation
(
IO
&
io
)
=
0
;
/**
* Get the parameters being used for PME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
virtual
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
=
0
;
};
}
// namespace OpenMM
}
// namespace OpenMM
#endif
/*OPENMM_KERNELS_H_*/
#endif
/*OPENMM_KERNELS_H_*/
openmmapi/include/openmm/NonbondedForce.h
View file @
a9f65649
...
@@ -109,7 +109,12 @@ public:
...
@@ -109,7 +109,12 @@ public:
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* with all periodic copies of every other particle.
* with all periodic copies of every other particle.
*/
*/
PME
=
4
PME
=
4
,
/**
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* with all periodic copies of every other particle for both electrostatics and dispersion. No switching is used for either interaction.
*/
LJPME
=
5
};
};
/**
/**
* Create a NonbondedForce.
* Create a NonbondedForce.
...
@@ -207,6 +212,16 @@ public:
...
@@ -207,6 +212,16 @@ public:
* @param[out] nz the number of grid points along the Z axis
* @param[out] nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters to use for dispersion term in LJ-PME calculations. If alpha is 0 (the default),
* these parameters are ignored and instead their values are chosen based on the Ewald error tolerance.
*
* @param[out] alpha the separation parameter
* @param[out] nx the number of dispersion grid points along the X axis
* @param[out] ny the number of dispersion grid points along the Y axis
* @param[out] nz the number of dispersion grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
/**
* Set the parameters to use for PME calculations. If alpha is 0 (the default), these parameters are
* Set the parameters to use for PME calculations. If alpha is 0 (the default), these parameters are
* ignored and instead their values are chosen based on the Ewald error tolerance.
* ignored and instead their values are chosen based on the Ewald error tolerance.
...
@@ -217,6 +232,16 @@ public:
...
@@ -217,6 +232,16 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
setPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
);
void
setPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
);
/**
* Set the parameters to use for the dispersion term in LJPME calculations. If alpha is 0 (the default),
* these parameters are ignored and instead their values are chosen based on the Ewald error tolerance.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
setLJPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
);
/**
/**
* Get the parameters being used for PME in a particular Context. Because some platforms have restrictions
* Get the parameters being used for PME in a particular Context. Because some platforms have restrictions
* on the allowed grid sizes, the values that are actually used may be slightly different from those
* on the allowed grid sizes, the values that are actually used may be slightly different from those
...
@@ -230,6 +255,19 @@ public:
...
@@ -230,6 +255,19 @@ public:
* @param[out] nz the number of grid points along the Z axis
* @param[out] nz the number of grid points along the Z axis
*/
*/
void
getPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the PME parameters being used for the dispersion term for LJPME in a particular Context. Because some
* platforms have restrictions on the allowed grid sizes, the values that are actually used may be slightly different
* from those specified with setPMEParameters(), or the standard values calculated based on the Ewald error tolerance.
* See the manual for details.
*
* @param context the Context for which to get the parameters
* @param[out] alpha the separation parameter
* @param[out] nx the number of grid points along the X axis
* @param[out] ny the number of grid points along the Y axis
* @param[out] nz the number of grid points along the Z axis
*/
void
getLJPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
/**
* Add the nonbonded force parameters for a particle. This should be called once for each particle
* Add the nonbonded force parameters for a particle. This should be called once for each particle
* in the System. When it is called for the i'th time, it specifies the parameters for the i'th particle.
* in the System. When it is called for the i'th time, it specifies the parameters for the i'th particle.
...
@@ -382,9 +420,9 @@ private:
...
@@ -382,9 +420,9 @@ private:
class
ParticleInfo
;
class
ParticleInfo
;
class
ExceptionInfo
;
class
ExceptionInfo
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
double
cutoffDistance
,
switchingDistance
,
rfDielectric
,
ewaldErrorTol
,
alpha
;
double
cutoffDistance
,
switchingDistance
,
rfDielectric
,
ewaldErrorTol
,
alpha
,
dalpha
;
bool
useSwitchingFunction
,
useDispersionCorrection
;
bool
useSwitchingFunction
,
useDispersionCorrection
;
int
recipForceGroup
,
nx
,
ny
,
nz
;
int
recipForceGroup
,
nx
,
ny
,
nz
,
dnx
,
dny
,
dnz
;
void
addExclusionsToSet
(
const
std
::
vector
<
std
::
set
<
int
>
>&
bonded12
,
std
::
set
<
int
>&
exclusions
,
int
baseParticle
,
int
fromParticle
,
int
currentLevel
)
const
;
void
addExclusionsToSet
(
const
std
::
vector
<
std
::
set
<
int
>
>&
bonded12
,
std
::
set
<
int
>&
exclusions
,
int
baseParticle
,
int
fromParticle
,
int
currentLevel
)
const
;
std
::
vector
<
ParticleInfo
>
particles
;
std
::
vector
<
ParticleInfo
>
particles
;
std
::
vector
<
ExceptionInfo
>
exceptions
;
std
::
vector
<
ExceptionInfo
>
exceptions
;
...
...
openmmapi/include/openmm/internal/NonbondedForceImpl.h
View file @
a9f65649
...
@@ -65,6 +65,7 @@ public:
...
@@ -65,6 +65,7 @@ public:
std
::
vector
<
std
::
string
>
getKernelNames
();
std
::
vector
<
std
::
string
>
getKernelNames
();
void
updateParametersInContext
(
ContextImpl
&
context
);
void
updateParametersInContext
(
ContextImpl
&
context
);
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
/**
* This is a utility routine that calculates the values to use for alpha and kmax when using
* This is a utility routine that calculates the values to use for alpha and kmax when using
* Ewald summation.
* Ewald summation.
...
@@ -74,7 +75,7 @@ public:
...
@@ -74,7 +75,7 @@ public:
* This is a utility routine that calculates the values to use for alpha and grid size when using
* This is a utility routine that calculates the values to use for alpha and grid size when using
* Particle Mesh Ewald.
* Particle Mesh Ewald.
*/
*/
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
);
static
void
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
);
/**
/**
* Compute the coefficient which, when divided by the periodic box volume, gives the
* Compute the coefficient which, when divided by the periodic box volume, gives the
* long range dispersion correction to the energy.
* long range dispersion correction to the energy.
...
...
openmmapi/src/NonbondedForce.cpp
View file @
a9f65649
...
@@ -106,6 +106,13 @@ void NonbondedForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz)
...
@@ -106,6 +106,13 @@ void NonbondedForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz)
nz
=
this
->
nz
;
nz
=
this
->
nz
;
}
}
void
NonbondedForce
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
alpha
=
this
->
dalpha
;
nx
=
this
->
dnx
;
ny
=
this
->
dny
;
nz
=
this
->
dnz
;
}
void
NonbondedForce
::
setPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
)
{
void
NonbondedForce
::
setPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
)
{
this
->
alpha
=
alpha
;
this
->
alpha
=
alpha
;
this
->
nx
=
nx
;
this
->
nx
=
nx
;
...
@@ -113,10 +120,21 @@ void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
...
@@ -113,10 +120,21 @@ void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
this
->
nz
=
nz
;
this
->
nz
=
nz
;
}
}
void
NonbondedForce
::
setLJPMEParameters
(
double
alpha
,
int
nx
,
int
ny
,
int
nz
)
{
this
->
dalpha
=
alpha
;
this
->
dnx
=
nx
;
this
->
dny
=
ny
;
this
->
dnz
=
nz
;
}
void
NonbondedForce
::
getPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
void
NonbondedForce
::
getPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
NonbondedForceImpl
&>
(
getImplInContext
(
context
)).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
dynamic_cast
<
const
NonbondedForceImpl
&>
(
getImplInContext
(
context
)).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
}
void
NonbondedForce
::
getLJPMEParametersInContext
(
const
Context
&
context
,
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
NonbondedForceImpl
&>
(
getImplInContext
(
context
)).
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
int
NonbondedForce
::
addParticle
(
double
charge
,
double
sigma
,
double
epsilon
)
{
int
NonbondedForce
::
addParticle
(
double
charge
,
double
sigma
,
double
epsilon
)
{
particles
.
push_back
(
ParticleInfo
(
charge
,
sigma
,
epsilon
));
particles
.
push_back
(
ParticleInfo
(
charge
,
sigma
,
epsilon
));
return
particles
.
size
()
-
1
;
return
particles
.
size
()
-
1
;
...
...
openmmapi/src/NonbondedForceImpl.cpp
View file @
a9f65649
...
@@ -151,8 +151,12 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
...
@@ -151,8 +151,12 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
kmaxz
++
;
kmaxz
++
;
}
}
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
)
{
void
NonbondedForceImpl
::
calcPMEParameters
(
const
System
&
system
,
const
NonbondedForce
&
force
,
double
&
alpha
,
int
&
xsize
,
int
&
ysize
,
int
&
zsize
,
bool
LJ
)
{
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
if
(
LJ
)
{
force
.
getLJPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
else
{
force
.
getPMEParameters
(
alpha
,
xsize
,
ysize
,
zsize
);
}
if
(
alpha
==
0.0
)
{
if
(
alpha
==
0.0
)
{
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
...
@@ -283,3 +287,7 @@ void NonbondedForceImpl::updateParametersInContext(ContextImpl& context) {
...
@@ -283,3 +287,7 @@ void NonbondedForceImpl::updateParametersInContext(ContextImpl& context) {
void
NonbondedForceImpl
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
void
NonbondedForceImpl
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
kernel
.
getAs
<
CalcNonbondedForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
kernel
.
getAs
<
CalcNonbondedForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
}
void
NonbondedForceImpl
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
kernel
.
getAs
<
CalcNonbondedForceKernel
>
().
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
platforms/cpu/include/CpuKernels.h
View file @
a9f65649
...
@@ -251,27 +251,37 @@ public:
...
@@ -251,27 +251,37 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
);
/**
/**
* Get the parameters being used for PME.
* Get the parameters being used for PME.
*
*
* @param alpha the separation parameter
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
PmeIO
;
class
PmeIO
;
CpuPlatform
::
PlatformData
&
data
;
CpuPlatform
::
PlatformData
&
data
;
int
numParticles
,
num14
;
int
numParticles
,
num14
;
int
**
bonded14IndexArray
;
int
**
bonded14IndexArray
;
double
**
bonded14ParamArray
;
double
**
bonded14ParamArray
;
double
nonbondedCutoff
,
switchingDistance
,
rfDielectric
,
ewaldAlpha
,
ewaldSelfEnergy
,
dispersionCoefficient
;
double
nonbondedCutoff
,
switchingDistance
,
rfDielectric
,
ewaldAlpha
,
ewaldDispersionAlpha
,
ewaldSelfEnergy
,
dispersionCoefficient
;
int
kmax
[
3
],
gridSize
[
3
];
int
kmax
[
3
],
gridSize
[
3
]
,
dispersionGridSize
[
3
]
;
bool
useSwitchingFunction
,
useOptimizedPme
,
hasInitializedPme
;
bool
useSwitchingFunction
,
useOptimizedPme
,
hasInitializedPme
,
hasInitializedDispersionPme
;
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
particleParams
;
std
::
vector
<
float
>
C6params
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
CpuNonbondedForce
*
nonbonded
;
CpuNonbondedForce
*
nonbonded
;
Kernel
optimizedPme
;
Kernel
optimizedPme
,
optimizedDispersionPme
;
CpuBondForce
bondForce
;
CpuBondForce
bondForce
;
};
};
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
a9f65649
...
@@ -104,16 +104,27 @@ class CpuNonbondedForce {
...
@@ -104,16 +104,27 @@ class CpuNonbondedForce {
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
setUsePME
(
float
alpha
,
int
meshSize
[
3
]);
void
setUsePME
(
float
alpha
,
int
meshSize
[
3
]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
setUseLJPME
(
float
alpha
,
int
meshSize
[
3
]);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
Calculate Ewald ixn
...
@@ -122,16 +133,17 @@ class CpuNonbondedForce {
...
@@ -122,16 +133,17 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param forces force array (forces added)
@param totalEnergy total energy
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusion
s
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
float
>
&
C6param
s
,
std
::
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
;
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
;
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -150,7 +162,7 @@ class CpuNonbondedForce {
...
@@ -150,7 +162,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
const
std
::
vector
<
float
>&
C6params
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
);
/**
/**
* This routine contains the code executed by each thread.
* This routine contains the code executed by each thread.
...
@@ -163,28 +175,32 @@ protected:
...
@@ -163,28 +175,32 @@ protected:
bool
periodic
;
bool
periodic
;
bool
triclinic
;
bool
triclinic
;
bool
ewald
;
bool
ewald
;
bool
pme
;
bool
ljpme
,
pme
;
bool
tableIsValid
;
bool
tableIsValid
,
expTableIsValid
;
const
CpuNeighborList
*
neighborList
;
const
CpuNeighborList
*
neighborList
;
float
recipBoxSize
[
3
];
float
recipBoxSize
[
3
];
RealVec
periodicBoxVectors
[
3
];
RealVec
periodicBoxVectors
[
3
];
AlignedArray
<
fvec4
>
periodicBoxVec4
;
AlignedArray
<
fvec4
>
periodicBoxVec4
;
float
cutoffDistance
,
switchingDistance
;
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
krf
,
crf
;
float
alphaEwald
;
float
alphaEwald
,
alphaDispersionEwald
;
int
numRx
,
numRy
,
numRz
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
int
meshDim
[
3
]
,
dispersionMeshDim
[
3
]
;
std
::
vector
<
float
>
erfcTable
,
ewaldScaleTable
;
std
::
vector
<
float
>
erfcTable
,
ewaldScaleTable
;
float
ewaldDX
,
ewaldDXInv
,
erfcDXInv
;
std
::
vector
<
float
>
exptermsTable
,
dExptermsTable
;
float
ewaldDX
,
ewaldDXInv
,
erfcDXInv
,
exptermsDX
,
exptermsDXInv
;
std
::
vector
<
double
>
threadEnergy
;
std
::
vector
<
double
>
threadEnergy
;
// The following variables are used to make information accessible to the individual threads.
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
int
numberOfAtoms
;
float
*
posq
;
float
*
posq
;
RealVec
const
*
atomCoordinates
;
RealVec
const
*
atomCoordinates
;
std
::
pair
<
float
,
float
>
const
*
atomParameters
;
std
::
pair
<
float
,
float
>
const
*
atomParameters
;
float
const
*
C6params
;
std
::
set
<
int
>
const
*
exclusions
;
std
::
set
<
int
>
const
*
exclusions
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
std
::
vector
<
AlignedArray
<
float
>
>*
threadForce
;
bool
includeEnergy
;
bool
includeEnergy
;
float
inverseRcut6
;
float
inverseRcut6Expterm
;
void
*
atomicCounter
;
void
*
atomicCounter
;
static
const
float
TWO_OVER_SQRT_PI
;
static
const
float
TWO_OVER_SQRT_PI
;
...
@@ -238,10 +254,29 @@ protected:
...
@@ -238,10 +254,29 @@ protected:
*/
*/
void
tabulateEwaldScaleFactor
();
void
tabulateEwaldScaleFactor
();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void
tabulateExpTerms
();
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
*/
*/
float
erfcApprox
(
float
x
);
float
erfcApprox
(
float
x
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float
exptermsApprox
(
float
R
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float
dExptermsApprox
(
float
R
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
View file @
a9f65649
...
@@ -88,11 +88,25 @@ protected:
...
@@ -88,11 +88,25 @@ protected:
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
*/
*/
fvec4
erfcApprox
(
const
fvec4
&
x
);
fvec4
erfcApprox
(
const
fvec4
&
x
);
/**
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
*/
fvec4
ewaldScaleFunction
(
const
fvec4
&
x
);
fvec4
ewaldScaleFunction
(
const
fvec4
&
x
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4
exptermsApprox
(
const
fvec4
&
R
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4
dExptermsApprox
(
const
fvec4
&
R
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
a9f65649
...
@@ -92,6 +92,21 @@ protected:
...
@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
*/
fvec8
ewaldScaleFunction
(
const
fvec8
&
x
);
fvec8
ewaldScaleFunction
(
const
fvec8
&
x
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8
exptermsApprox
(
const
fvec8
&
R
);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8
dExptermsApprox
(
const
fvec8
&
R
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
a9f65649
...
@@ -50,6 +50,7 @@
...
@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h"
#include "lepton/CustomFunction.h"
#include "lepton/Operation.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h"
#include "lepton/ParsedExpression.h"
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
...
@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
nonbonded
(
NULL
)
{
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
hasInitializedDispersionPme
(
false
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
if
(
isVec8Supported
())
nonbonded
=
createCpuNonbondedForceVec8
();
nonbonded
=
createCpuNonbondedForceVec8
();
else
else
...
@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
...
@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
bonded14ParamArray
[
i
]
=
new
double
[
3
];
bonded14ParamArray
[
i
]
=
new
double
[
3
];
particleParams
.
resize
(
numParticles
);
particleParams
.
resize
(
numParticles
);
C6params
.
resize
(
numParticles
);
double
sumSquaredCharges
=
0.0
;
double
sumSquaredCharges
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
double
charge
,
radius
,
depth
;
double
charge
,
radius
,
depth
;
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
);
force
.
getParticleParameters
(
i
,
charge
,
radius
,
depth
);
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
data
.
posq
[
4
*
i
+
3
]
=
(
float
)
charge
;
particleParams
[
i
]
=
make_pair
((
float
)
(
0.5
*
radius
),
(
float
)
(
2.0
*
sqrt
(
depth
)));
particleParams
[
i
]
=
make_pair
((
float
)
(
0.5
*
radius
),
(
float
)
(
2.0
*
sqrt
(
depth
)));
C6params
[
i
]
=
8.0
*
pow
(
particleParams
[
i
].
first
,
3.0
)
*
particleParams
[
i
].
second
;
sumSquaredCharges
+=
charge
*
charge
;
sumSquaredCharges
+=
charge
*
charge
;
}
}
...
@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
...
@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
}
}
else
if
(
nonbondedMethod
==
PME
)
{
else
if
(
nonbondedMethod
==
PME
)
{
double
alpha
;
double
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
]);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
]
,
false
);
ewaldAlpha
=
alpha
;
ewaldAlpha
=
alpha
;
}
}
if
(
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
)
else
if
(
nonbondedMethod
==
LJPME
)
{
double
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
false
);
ewaldAlpha
=
(
RealOpenMM
)
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
dispersionGridSize
[
0
],
dispersionGridSize
[
1
],
dispersionGridSize
[
2
],
true
);
ewaldDispersionAlpha
=
(
RealOpenMM
)
alpha
;
useSwitchingFunction
=
false
;
}
if
(
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
||
nonbondedMethod
==
LJPME
)
{
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
ewaldAlpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
ewaldAlpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
else
if
(
nonbondedMethod
==
LJPME
){
for
(
int
atom
=
0
;
atom
<
numParticles
;
atom
++
)
{
// Dispersion self term
ewaldSelfEnergy
+=
pow
(
ewaldDispersionAlpha
,
6.0
)
*
C6params
[
atom
]
*
C6params
[
atom
]
/
12.0
;
}
}
}
else
{
ewaldSelfEnergy
=
0.0
;
ewaldSelfEnergy
=
0.0
;
}
rfDielectric
=
force
.
getReactionFieldDielectric
();
rfDielectric
=
force
.
getReactionFieldDielectric
();
if
(
force
.
getUseDispersionCorrection
())
if
(
force
.
getUseDispersionCorrection
())
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
else
else
dispersionCoefficient
=
0.0
;
dispersionCoefficient
=
0.0
;
data
.
isPeriodic
=
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
);
data
.
isPeriodic
=
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
||
nonbondedMethod
==
LJPME
);
}
}
double
CpuCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
double
CpuCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
...
@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
numParticles
,
ewaldAlpha
);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
numParticles
,
ewaldAlpha
);
}
}
}
}
if
(
nonbondedMethod
==
LJPME
)
{
// If available, use the optimized PME implementation.
vector
<
string
>
kernelNames
;
kernelNames
.
push_back
(
"CalcPmeReciprocalForce"
);
useOptimizedPme
=
getPlatform
().
supportsKernels
(
kernelNames
);
if
(
useOptimizedPme
)
{
optimizedPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
context
);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
numParticles
,
ewaldAlpha
);
optimizedDispersionPme
=
getPlatform
().
createKernel
(
CalcDispersionPmeReciprocalForceKernel
::
Name
(),
context
);
optimizedDispersionPme
.
getAs
<
CalcDispersionPmeReciprocalForceKernel
>
().
initialize
(
dispersionGridSize
[
0
],
dispersionGridSize
[
1
],
dispersionGridSize
[
2
],
numParticles
,
ewaldDispersionAlpha
);
}
}
}
}
AlignedArray
<
float
>&
posq
=
data
.
posq
;
AlignedArray
<
float
>&
posq
=
data
.
posq
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
...
@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
bool
pme
=
(
nonbondedMethod
==
PME
);
bool
ljpme
=
(
nonbondedMethod
==
LJPME
);
if
(
nonbondedMethod
!=
NoCutoff
)
if
(
nonbondedMethod
!=
NoCutoff
)
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
data
.
neighborList
,
rfDielectric
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
data
.
neighborList
,
rfDielectric
);
if
(
data
.
isPeriodic
)
{
if
(
data
.
isPeriodic
)
{
...
@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
if
(
useSwitchingFunction
)
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
->
setUseSwitchingFunction
(
switchingDistance
);
if
(
ljpme
){
nonbonded
->
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
->
setUseLJPME
(
ewaldDispersionAlpha
,
dispersionGridSize
);
}
double
nonbondedEnergy
=
0
;
double
nonbondedEnergy
=
0
;
if
(
includeDirect
)
if
(
includeDirect
)
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
nonbonded
->
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
C6params
,
exclusions
,
data
.
threadForce
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
data
.
threads
);
if
(
includeReciprocal
)
{
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
PmeIO
io
(
&
posq
[
0
],
&
data
.
threadForce
[
0
][
0
],
numParticles
);
...
@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
nonbondedEnergy
+=
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
}
else
else
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
->
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
C6params
,
exclusions
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
}
energy
+=
nonbondedEnergy
;
energy
+=
nonbondedEnergy
;
if
(
includeDirect
)
{
if
(
includeDirect
)
{
ReferenceLJCoulomb14
nonbonded14
;
ReferenceLJCoulomb14
nonbonded14
;
bondForce
.
calculateForce
(
posData
,
bonded14ParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
nonbonded14
);
bondForce
.
calculateForce
(
posData
,
bonded14ParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
nonbonded14
);
if
(
data
.
isPeriodic
)
if
(
data
.
isPeriodic
&&
nonbondedMethod
!=
LJPME
)
energy
+=
dispersionCoefficient
/
(
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
]);
energy
+=
dispersionCoefficient
/
(
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
]);
}
}
return
energy
;
return
energy
;
...
@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
}
}
void
CpuCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
void
CpuCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
nonbondedMethod
!=
PME
)
if
(
nonbondedMethod
!=
PME
&&
nonbondedMethod
!=
LJPME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
if
(
useOptimizedPme
)
if
(
useOptimizedPme
)
optimizedPme
.
getAs
<
const
CalcPmeReciprocalForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
optimizedPme
.
getAs
<
const
CalcPmeReciprocalForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
...
@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
...
@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
}
}
void
CpuCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
nonbondedMethod
!=
LJPME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
if
(
useOptimizedPme
)
optimizedDispersionPme
.
getAs
<
const
CalcPmeReciprocalForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
else
{
alpha
=
ewaldDispersionAlpha
;
nx
=
dispersionGridSize
[
0
];
ny
=
dispersionGridSize
[
1
];
nz
=
dispersionGridSize
[
2
];
}
}
CpuCalcCustomNonbondedForceKernel
::
CpuCalcCustomNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CpuCalcCustomNonbondedForceKernel
::
CpuCalcCustomNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
forceCopy
(
NULL
),
nonbonded
(
NULL
)
{
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
forceCopy
(
NULL
),
nonbonded
(
NULL
)
{
}
}
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
a9f65649
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include "ReferencePME.h"
#include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
// make sure that erf() and erfc() are defined.
...
@@ -57,7 +58,8 @@ public:
...
@@ -57,7 +58,8 @@ public:
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
tableIsValid
(
false
),
cutoffDistance
(
0.0
f
),
alphaEwald
(
0.0
f
)
{
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
),
ljpme
(
false
),
tableIsValid
(
false
),
expTableIsValid
(
false
),
cutoffDistance
(
0.0
f
),
alphaDispersionEwald
(
0.0
f
),
alphaEwald
(
0.0
f
)
{
}
}
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
CpuNonbondedForce
::~
CpuNonbondedForce
()
{
...
@@ -78,10 +80,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
...
@@ -78,10 +80,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid
=
false
;
tableIsValid
=
false
;
cutoff
=
true
;
cutoff
=
true
;
cutoffDistance
=
distance
;
cutoffDistance
=
distance
;
inverseRcut6
=
pow
(
cutoffDistance
,
-
6
);
neighborList
=
&
neighbors
;
neighborList
=
&
neighbors
;
krf
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
)
/
(
2.0
*
solventDielectric
+
1.0
);
krf
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
)
/
(
2.0
*
solventDielectric
+
1.0
);
crf
=
(
1.0
/
cutoffDistance
)
*
(
3.0
*
solventDielectric
)
/
(
2.0
*
solventDielectric
+
1.0
);
crf
=
(
1.0
/
cutoffDistance
)
*
(
3.0
*
solventDielectric
)
/
(
2.0
*
solventDielectric
+
1.0
);
}
if
(
alphaDispersionEwald
!=
0.0
f
){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double
dalphaR
=
alphaDispersionEwald
*
cutoffDistance
;
double
dar2
=
dalphaR
*
dalphaR
;
double
dar4
=
dar2
*
dar2
;
double
dar6
=
dar4
*
dar2
;
double
expterm
=
EXP
(
-
dar2
);
inverseRcut6Expterm
=
inverseRcut6
*
(
1.0
-
expterm
*
(
1.0
+
dar2
+
0.5
*
dar4
));
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance
=
distance
;
switchingDistance
=
distance
;
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
also been set, and the smallest side of the periodic box is at least twice the cutoff
...
@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
void
CpuNonbondedForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
assert
(
cutoff
);
assert
(
cutoff
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
...
@@ -124,11 +137,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -124,11 +137,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4
[
1
]
=
fvec4
(
periodicBoxVectors
[
1
][
0
],
periodicBoxVectors
[
1
][
1
],
periodicBoxVectors
[
1
][
2
],
0
);
periodicBoxVec4
[
1
]
=
fvec4
(
periodicBoxVectors
[
1
][
0
],
periodicBoxVectors
[
1
][
1
],
periodicBoxVectors
[
1
][
2
],
0
);
periodicBoxVec4
[
2
]
=
fvec4
(
periodicBoxVectors
[
2
][
0
],
periodicBoxVectors
[
2
][
1
],
periodicBoxVectors
[
2
][
2
],
0
);
periodicBoxVec4
[
2
]
=
fvec4
(
periodicBoxVectors
[
2
][
0
],
periodicBoxVectors
[
2
][
1
],
periodicBoxVectors
[
2
][
2
],
0
);
triclinic
=
(
periodicBoxVectors
[
0
][
1
]
!=
0.0
||
periodicBoxVectors
[
0
][
2
]
!=
0.0
||
triclinic
=
(
periodicBoxVectors
[
0
][
1
]
!=
0.0
||
periodicBoxVectors
[
0
][
2
]
!=
0.0
||
periodicBoxVectors
[
1
][
0
]
!=
0.0
||
periodicBoxVectors
[
1
][
2
]
!=
0.0
||
periodicBoxVectors
[
1
][
0
]
!=
0.0
||
periodicBoxVectors
[
1
][
2
]
!=
0.0
||
periodicBoxVectors
[
2
][
0
]
!=
0.0
||
periodicBoxVectors
[
2
][
1
]
!=
0.0
);
periodicBoxVectors
[
2
][
0
]
!=
0.0
||
periodicBoxVectors
[
2
][
1
]
!=
0.0
);
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
Set the force to use Ewald summation.
...
@@ -139,18 +152,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -139,18 +152,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
void
CpuNonbondedForce
::
setUseEwald
(
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
if
(
alpha
!=
alphaEwald
)
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
tableIsValid
=
false
;
alphaEwald
=
alpha
;
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
numRy
=
kmaxy
;
numRz
=
kmaxz
;
numRz
=
kmaxz
;
ewald
=
true
;
ewald
=
true
;
tabulateEwaldScaleFactor
();
tabulateEwaldScaleFactor
();
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
Set the force to use Particle-Mesh Ewald (PME) summation.
...
@@ -159,19 +172,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -159,19 +172,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
void
CpuNonbondedForce
::
setUsePME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaEwald
)
if
(
alpha
!=
alphaEwald
)
tableIsValid
=
false
;
tableIsValid
=
false
;
alphaEwald
=
alpha
;
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
meshDim
[
1
]
=
meshSize
[
1
];
meshDim
[
2
]
=
meshSize
[
2
];
meshDim
[
2
]
=
meshSize
[
2
];
pme
=
true
;
pme
=
true
;
tabulateEwaldScaleFactor
();
tabulateEwaldScaleFactor
();
}
}
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseLJPME
(
float
alpha
,
int
meshSize
[
3
])
{
if
(
alpha
!=
alphaDispersionEwald
)
expTableIsValid
=
false
;
alphaDispersionEwald
=
alpha
;
dispersionMeshDim
[
0
]
=
meshSize
[
0
];
dispersionMeshDim
[
1
]
=
meshSize
[
1
];
dispersionMeshDim
[
2
]
=
meshSize
[
2
];
ljpme
=
true
;
tabulateExpTerms
();
if
(
cutoffDistance
!=
0.0
f
){
// We set this here, in case setUseLJPME is called after the cutoff is set
double
dalphaR
=
alphaDispersionEwald
*
cutoffDistance
;
double
dar2
=
dalphaR
*
dalphaR
;
double
dar4
=
dar2
*
dar2
;
double
dar6
=
dar4
*
dar2
;
double
expterm
=
EXP
(
-
dar2
);
inverseRcut6Expterm
=
inverseRcut6
*
(
1.0
-
expterm
*
(
1.0
+
dar2
+
0.5
*
dar4
));
}
}
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
if
(
tableIsValid
)
if
(
tableIsValid
)
return
;
return
;
tableIsValid
=
true
;
tableIsValid
=
true
;
...
@@ -187,10 +230,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
...
@@ -187,10 +230,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable
[
i
]
=
erfcTable
[
i
]
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
ewaldScaleTable
[
i
]
=
erfcTable
[
i
]
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
}
}
}
void
CpuNonbondedForce
::
tabulateExpTerms
()
{
if
(
expTableIsValid
)
return
;
expTableIsValid
=
true
;
exptermsDX
=
cutoffDistance
/
NUM_TABLE_POINTS
;
exptermsDXInv
=
1.0
f
/
exptermsDX
;
exptermsTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
dExptermsTable
.
resize
(
NUM_TABLE_POINTS
+
4
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
4
;
i
++
)
{
double
r
=
i
*
ewaldDX
;
double
dalphaR
=
alphaDispersionEwald
*
r
;
double
dar2
=
dalphaR
*
dalphaR
;
double
dar4
=
dar2
*
dar2
;
double
dar6
=
dar4
*
dar2
;
double
expterm
=
EXP
(
-
dar2
);
exptermsTable
[
i
]
=
(
1.0
-
expterm
*
(
1.0
+
dar2
+
0.5
*
dar4
));
dExptermsTable
[
i
]
=
(
1.0
-
expterm
*
(
1.0
+
dar2
+
0.5
*
dar4
+
dar6
/
6.0
));
}
}
void
CpuNonbondedForce
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
void
CpuNonbondedForce
::
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
float
>
&
C6params
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
{
vector
<
RealVec
>&
forces
,
double
*
totalEnergy
)
const
{
typedef
std
::
complex
<
float
>
d_complex
;
typedef
std
::
complex
<
float
>
d_complex
;
static
const
float
epsilon
=
1.0
;
static
const
float
epsilon
=
1.0
;
...
@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
...
@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if
(
totalEnergy
)
if
(
totalEnergy
)
*
totalEnergy
+=
recipEnergy
;
*
totalEnergy
+=
recipEnergy
;
pme_destroy
(
pmedata
);
pme_destroy
(
pmedata
);
if
(
ljpme
)
{
// Dispersion reciprocal space terms
pme_init
(
&
pmedata
,
alphaDispersionEwald
,
numberOfAtoms
,
dispersionMeshDim
,
5
,
1
);
std
::
vector
<
RealVec
>
dpmeforces
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
){
charges
[
i
]
=
(
RealOpenMM
)
C6params
[
i
];
dpmeforces
.
push_back
(
RealVec
());
}
RealOpenMM
recipDispersionEnergy
=
0.0
;
pme_exec_dpme
(
pmedata
,
atomCoordinates
,
dpmeforces
,
charges
,
periodicBoxVectors
,
&
recipDispersionEnergy
);
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
){
forces
[
i
][
0
]
-=
2.0
*
dpmeforces
[
i
][
0
];
forces
[
i
][
1
]
-=
2.0
*
dpmeforces
[
i
][
1
];
forces
[
i
][
2
]
-=
2.0
*
dpmeforces
[
i
][
2
];
}
if
(
totalEnergy
)
*
totalEnergy
+=
recipDispersionEnergy
;
pme_destroy
(
pmedata
);
}
}
}
// Ewald method
// Ewald method
...
@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
...
@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
);
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
);
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
);
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
);
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
);
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
);
...
@@ -232,15 +318,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
...
@@ -232,15 +318,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for
(
int
i
=
0
;
(
i
<
numberOfAtoms
);
i
++
)
{
for
(
int
i
=
0
;
(
i
<
numberOfAtoms
);
i
++
)
{
float
*
pos
=
posq
+
4
*
i
;
float
*
pos
=
posq
+
4
*
i
;
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
0
,
i
,
m
)
=
d_complex
(
1
,
0
);
EIR
(
0
,
i
,
m
)
=
d_complex
(
1
,
0
);
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
1
,
i
,
m
)
=
d_complex
(
cos
(
pos
[
m
]
*
recipBoxSize
[
m
]),
EIR
(
1
,
i
,
m
)
=
d_complex
(
cos
(
pos
[
m
]
*
recipBoxSize
[
m
]),
sin
(
pos
[
m
]
*
recipBoxSize
[
m
]));
sin
(
pos
[
m
]
*
recipBoxSize
[
m
]));
for
(
int
j
=
2
;
(
j
<
kmax
);
j
++
)
for
(
int
j
=
2
;
(
j
<
kmax
);
j
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
for
(
int
m
=
0
;
(
m
<
3
);
m
++
)
EIR
(
j
,
i
,
m
)
=
EIR
(
j
-
1
,
i
,
m
)
*
EIR
(
1
,
i
,
m
);
EIR
(
j
,
i
,
m
)
=
EIR
(
j
-
1
,
i
,
m
)
*
EIR
(
1
,
i
,
m
);
}
}
// calculate reciprocal space energy and forces
// calculate reciprocal space energy and forces
...
@@ -254,11 +340,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
...
@@ -254,11 +340,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float
ky
=
ry
*
recipBoxSize
[
1
];
float
ky
=
ry
*
recipBoxSize
[
1
];
if
(
ry
>=
0
)
{
if
(
ry
>=
0
)
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
EIR
(
ry
,
n
,
1
);
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
EIR
(
ry
,
n
,
1
);
}
}
else
{
else
{
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
conj
(
EIR
(
-
ry
,
n
,
1
));
tab_xy
[
n
]
=
EIR
(
rx
,
n
,
0
)
*
conj
(
EIR
(
-
ry
,
n
,
1
));
}
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
if
(
rz
>=
0
)
{
if
(
rz
>=
0
)
{
...
@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
...
@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
RealVec
>&
atomCoordinates
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
const
vector
<
float
>&
C6params
,
const
vector
<
set
<
int
>
>&
exclusions
,
vector
<
AlignedArray
<
float
>
>&
threadForce
,
double
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
numberOfAtoms
=
numberOfAtoms
;
this
->
posq
=
posq
;
this
->
posq
=
posq
;
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
atomCoordinates
=
&
atomCoordinates
[
0
];
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
C6params
=
&
C6params
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
this
->
threadForce
=
&
threadForce
;
this
->
threadForce
=
&
threadForce
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
includeEnergy
=
(
totalEnergy
!=
NULL
);
...
@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
...
@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask
task
(
*
this
);
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
execute
(
task
);
// ACS calls threadcomputedirect
threads
.
waitForThreads
();
threads
.
waitForThreads
();
// Signal the threads to subtract the exclusions.
// Signal the threads to subtract the exclusions.
...
@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
float
*
forces
=
&
(
*
threadForce
)[
threadIndex
][
0
];
fvec4
boxSize
(
periodicBoxVectors
[
0
][
0
],
periodicBoxVectors
[
1
][
1
],
periodicBoxVectors
[
2
][
2
],
0
);
fvec4
boxSize
(
periodicBoxVectors
[
0
][
0
],
periodicBoxVectors
[
1
][
1
],
periodicBoxVectors
[
2
][
2
],
0
);
fvec4
invBoxSize
(
recipBoxSize
[
0
],
recipBoxSize
[
1
],
recipBoxSize
[
2
],
0
);
fvec4
invBoxSize
(
recipBoxSize
[
0
],
recipBoxSize
[
1
],
recipBoxSize
[
2
],
0
);
if
(
ewald
||
pme
)
{
if
(
ewald
||
pme
||
ljpme
)
{
// Compute the interactions from the neighbor list.
// Compute the interactions from the neighbor list.
while
(
true
)
{
while
(
true
)
{
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
int
nextBlock
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
if
(
nextBlock
>=
neighborList
->
getNumBlocks
())
...
@@ -370,7 +456,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -370,7 +456,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break
;
break
;
int
end
=
min
(
start
+
groupSize
,
numberOfAtoms
);
int
end
=
min
(
start
+
groupSize
,
numberOfAtoms
);
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
fvec4
posI
((
float
)
atomCoordinates
[
i
][
0
],
(
float
)
atomCoordinates
[
i
][
1
],
(
float
)
atomCoordinates
[
i
][
2
],
0.0
f
);
fvec4
posI
((
float
)
atomCoordinates
[
i
][
0
],
(
float
)
atomCoordinates
[
i
][
1
],
(
float
)
atomCoordinates
[
i
][
2
],
0.0
f
);
float
scaledChargeI
=
(
float
)
(
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]);
float
scaledChargeI
=
(
float
)
(
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]);
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
if
(
*
iter
>
i
)
{
...
@@ -394,7 +480,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -394,7 +480,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy
[
threadIndex
]
-=
chargeProdOverR
*
erfAlphaR
;
threadEnergy
[
threadIndex
]
-=
chargeProdOverR
*
erfAlphaR
;
}
}
else
if
(
includeEnergy
)
else
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
alphaEwald
*
TWO_OVER_SQRT_PI
*
scaledChargeI
*
posq
[
4
*
j
+
3
];
threadEnergy
[
threadIndex
]
-=
alphaEwald
*
TWO_OVER_SQRT_PI
*
scaledChargeI
*
posq
[
4
*
j
+
3
];
if
(
ljpme
)
{
float
C6ij
=
C6params
[
i
]
*
C6params
[
j
];
float
inverseR2
=
1.0
f
/
r2
;
float
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
+=
emult
;
float
dEdR
=
-
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
}
}
}
}
}
}
}
...
@@ -444,7 +541,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
...
@@ -444,7 +541,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
}
}
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
float
sig2
=
inverseR
*
sig
;
float
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
...
@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
...
@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4
result
=
deltaR
*
dEdR
;
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
deltaR
=
posJ
-
posI
;
...
@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
...
@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return
coeff1
*
erfcTable
[
index
]
+
coeff2
*
erfcTable
[
index
+
1
];
return
coeff1
*
erfcTable
[
index
]
+
coeff2
*
erfcTable
[
index
+
1
];
}
}
float
CpuNonbondedForce
::
exptermsApprox
(
float
x
)
{
float
x1
=
x
*
exptermsDXInv
;
int
index
=
min
((
int
)
floor
(
x1
),
NUM_TABLE_POINTS
);
float
coeff2
=
x1
-
index
;
float
coeff1
=
1.0
f
-
coeff2
;
return
coeff1
*
exptermsTable
[
index
]
+
coeff2
*
exptermsTable
[
index
+
1
];
}
float
CpuNonbondedForce
::
dExptermsApprox
(
float
x
)
{
float
x1
=
x
*
exptermsDXInv
;
int
index
=
min
((
int
)
floor
(
x1
),
NUM_TABLE_POINTS
);
float
coeff2
=
x1
-
index
;
float
coeff1
=
1.0
f
-
coeff2
;
return
coeff1
*
dExptermsTable
[
index
]
+
coeff2
*
dExptermsTable
[
index
+
1
];
}
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
a9f65649
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
PeriodicType
periodicType
;
fvec4
blockCenter
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
if
(
!
periodic
)
{
...
@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
...
@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template
<
int
PERIODIC_TYPE
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
...
@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
C6s
(
C6params
[
blockAtom
[
0
]],
C6params
[
blockAtom
[
1
]],
C6params
[
blockAtom
[
2
]],
C6params
[
blockAtom
[
3
]]);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
...
@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4
sig2
=
inverseR
*
sig
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec4
eps
=
blockAtomEpsilon
*
atomEpsilon
;
fvec4
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
...
@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
if
(
ljpme
)
{
fvec4
C6ij
=
C6s
*
C6params
[
atom
];
fvec4
inverseR2
=
inverseR
*
inverseR
;
fvec4
mysig2
=
sig
*
sig
;
fvec4
mysig6
=
mysig2
*
mysig2
*
mysig2
;
fvec4
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
fvec4
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
energy
+=
emult
+
potentialShift
;
}
}
}
else
{
else
{
energy
=
0.0
f
;
energy
=
0.0
f
;
...
@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
}
// Record the forces on the block atoms.
// Record the forces on the block atoms.
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
...
@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose
(
t1
,
t2
,
t3
,
t4
);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
return
coeff1
*
t1
+
coeff2
*
t2
;
}
}
fvec4
CpuNonbondedForceVec4
::
exptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
exptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
exptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
exptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
exptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
fvec4
CpuNonbondedForceVec4
::
dExptermsApprox
(
const
fvec4
&
r
)
{
fvec4
r1
=
r
*
exptermsDXInv
;
ivec4
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec4
coeff2
=
r1
-
index
;
fvec4
coeff1
=
1.0
f
-
coeff2
;
fvec4
t1
(
&
dExptermsTable
[
index
[
0
]]);
fvec4
t2
(
&
dExptermsTable
[
index
[
1
]]);
fvec4
t3
(
&
dExptermsTable
[
index
[
2
]]);
fvec4
t4
(
&
dExptermsTable
[
index
[
3
]]);
transpose
(
t1
,
t2
,
t3
,
t4
);
return
coeff1
*
t1
+
coeff2
*
t2
;
}
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
a9f65649
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <algorithm>
#include <iostream>
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
...
@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
...
@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType
periodicType
;
PeriodicType
periodicType
;
fvec4
blockCenter
;
fvec4
blockCenter
;
if
(
!
periodic
)
{
if
(
!
periodic
)
{
...
@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
C6s
(
C6params
[
blockAtom
[
0
]],
C6params
[
blockAtom
[
1
]],
C6params
[
blockAtom
[
2
]],
C6params
[
blockAtom
[
3
]],
C6params
[
blockAtom
[
4
]],
C6params
[
blockAtom
[
5
]],
C6params
[
blockAtom
[
6
]],
C6params
[
blockAtom
[
7
]]);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
...
@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8
sig2
=
inverseR
*
sig
;
fvec8
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
sig6
=
sig2
*
sig2
*
sig2
;
fvec8
epsSig6
=
blockAtomEpsilon
*
atomEpsilon
*
sig6
;
fvec8
eps
=
blockAtomEpsilon
*
atomEpsilon
;
fvec8
epsSig6
=
eps
*
sig6
;
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
...
@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
dEdR
=
switchValue
*
dEdR
-
energy
*
switchDeriv
*
r
;
energy
*=
switchValue
;
energy
*=
switchValue
;
}
}
if
(
ljpme
)
{
fvec8
C6ij
=
C6s
*
C6params
[
atom
];
fvec8
inverseR2
=
inverseR
*
inverseR
;
fvec8
mysig2
=
sig
*
sig
;
fvec8
mysig6
=
mysig2
*
mysig2
*
mysig2
;
fvec8
emult
=
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
exptermsApprox
(
r
);
fvec8
potentialShift
=
eps
*
(
1.0
f
-
mysig6
*
inverseRcut6
)
*
mysig6
*
inverseRcut6
-
C6ij
*
inverseRcut6Expterm
;
dEdR
+=
6.0
f
*
C6ij
*
inverseR2
*
inverseR2
*
inverseR2
*
dExptermsApprox
(
r
);
energy
+=
emult
+
potentialShift
;
}
}
}
else
{
else
{
energy
=
0.0
f
;
energy
=
0.0
f
;
...
@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
...
@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
return
coeff1
*
s1
+
coeff2
*
s2
;
}
}
fvec8
CpuNonbondedForceVec8
::
exptermsApprox
(
const
fvec8
&
r
)
{
fvec8
r1
=
r
*
exptermsDXInv
;
ivec8
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
r1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
exptermsTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
exptermsTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
exptermsTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
exptermsTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
exptermsTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
exptermsTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
exptermsTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
exptermsTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
fvec8
CpuNonbondedForceVec8
::
dExptermsApprox
(
const
fvec8
&
r
)
{
fvec8
r1
=
r
*
exptermsDXInv
;
ivec8
index
=
min
(
floor
(
r1
),
NUM_TABLE_POINTS
);
fvec8
coeff2
=
r1
-
index
;
fvec8
coeff1
=
1.0
f
-
coeff2
;
ivec4
indexLower
=
index
.
lowerVec
();
ivec4
indexUpper
=
index
.
upperVec
();
fvec4
t1
(
&
dExptermsTable
[
indexLower
[
0
]]);
fvec4
t2
(
&
dExptermsTable
[
indexLower
[
1
]]);
fvec4
t3
(
&
dExptermsTable
[
indexLower
[
2
]]);
fvec4
t4
(
&
dExptermsTable
[
indexLower
[
3
]]);
fvec4
t5
(
&
dExptermsTable
[
indexUpper
[
0
]]);
fvec4
t6
(
&
dExptermsTable
[
indexUpper
[
1
]]);
fvec4
t7
(
&
dExptermsTable
[
indexUpper
[
2
]]);
fvec4
t8
(
&
dExptermsTable
[
indexUpper
[
3
]]);
fvec8
s1
,
s2
,
s3
,
s4
;
transpose
(
t1
,
t2
,
t3
,
t4
,
t5
,
t6
,
t7
,
t8
,
s1
,
s2
,
s3
,
s4
);
return
coeff1
*
s1
+
coeff2
*
s2
;
}
#endif
#endif
platforms/cuda/include/CudaKernels.h
View file @
a9f65649
...
@@ -598,8 +598,10 @@ private:
...
@@ -598,8 +598,10 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
C6s
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
directDispersionPmeGrid
(
NULL
),
reciprocalDispersionPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeAtomDispersionGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
dispersionPmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
),
dispersionPmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -636,6 +638,15 @@ public:
...
@@ -636,6 +638,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
SortTrait
:
public
CudaSort
::
SortTrait
{
class
SortTrait
:
public
CudaSort
::
SortTrait
{
int
getDataSize
()
const
{
return
8
;}
int
getDataSize
()
const
{
return
8
;}
...
@@ -655,38 +666,55 @@ private:
...
@@ -655,38 +666,55 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
bool
hasInitializedFFT
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
sigmaEpsilon
;
CudaArray
*
C6s
;
CudaArray
*
exceptionParams
;
CudaArray
*
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
*
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
*
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
*
directDispersionPmeGrid
;
CudaArray
*
reciprocalDispersionPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomDispersionGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
*
dispersionPmeEnergyBuffer
;
CudaSort
*
sort
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuPme
;
Kernel
cpuDispersionPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
CUstream
pmeStream
;
PmeIO
*
dispersionPmeio
;
CUevent
pmeSyncEvent
;
CUstream
pmeStream
,
dispersionPmeStream
;
CUevent
pmeSyncEvent
,
dispersionPmeSyncEvent
;
CudaFFT3D
*
fft
;
CudaFFT3D
*
fft
;
cufftHandle
fftForward
;
cufftHandle
fftForward
;
cufftHandle
fftBackward
;
cufftHandle
fftBackward
;
CudaFFT3D
*
dispersionFft
;
cufftHandle
dispersionFftForward
;
cufftHandle
dispersionFftBackward
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldSumsKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
ewaldForcesKernel
;
CUfunction
pmeGridIndexKernel
;
CUfunction
pmeGridIndexKernel
;
CUfunction
pmeDispersionGridIndexKernel
;
CUfunction
pmeSpreadChargeKernel
;
CUfunction
pmeSpreadChargeKernel
;
CUfunction
pmeDispersionSpreadChargeKernel
;
CUfunction
pmeFinishSpreadChargeKernel
;
CUfunction
pmeFinishSpreadChargeKernel
;
CUfunction
pmeDispersionFinishSpreadChargeKernel
;
CUfunction
pmeEvalEnergyKernel
;
CUfunction
pmeEvalEnergyKernel
;
CUfunction
pmeEvalDispersionEnergyKernel
;
CUfunction
pmeConvolutionKernel
;
CUfunction
pmeConvolutionKernel
;
CUfunction
pmeDispersionConvolutionKernel
;
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateForceKernel
;
CUfunction
pmeInterpolateDispersionForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersion
Coefficient
,
a
lpha
;
double
ewaldSelfEnergy
,
dispersion
SelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionA
lpha
;
int
interpolateForceThreads
;
int
interpolateForceThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeStream
,
useCudaFFT
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
a9f65649
...
@@ -439,6 +439,15 @@ public:
...
@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis
* @param nz the number of grid points along the Z axis
*/
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
private:
class
Task
;
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
CudaPlatform
::
PlatformData
&
data
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
a9f65649
...
@@ -1593,14 +1593,20 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1593,14 +1593,20 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
if
(
sigmaEpsilon
!=
NULL
)
if
(
sigmaEpsilon
!=
NULL
)
delete
sigmaEpsilon
;
delete
sigmaEpsilon
;
if
(
C6s
!=
NULL
)
delete
C6s
;
if
(
exceptionParams
!=
NULL
)
if
(
exceptionParams
!=
NULL
)
delete
exceptionParams
;
delete
exceptionParams
;
if
(
cosSinSums
!=
NULL
)
if
(
cosSinSums
!=
NULL
)
delete
cosSinSums
;
delete
cosSinSums
;
if
(
directPmeGrid
!=
NULL
)
if
(
directPmeGrid
!=
NULL
)
delete
directPmeGrid
;
delete
directPmeGrid
;
if
(
directDispersionPmeGrid
!=
NULL
)
delete
directDispersionPmeGrid
;
if
(
reciprocalPmeGrid
!=
NULL
)
if
(
reciprocalPmeGrid
!=
NULL
)
delete
reciprocalPmeGrid
;
delete
reciprocalPmeGrid
;
if
(
reciprocalDispersionPmeGrid
!=
NULL
)
delete
reciprocalDispersionPmeGrid
;
if
(
pmeBsplineModuliX
!=
NULL
)
if
(
pmeBsplineModuliX
!=
NULL
)
delete
pmeBsplineModuliX
;
delete
pmeBsplineModuliX
;
if
(
pmeBsplineModuliY
!=
NULL
)
if
(
pmeBsplineModuliY
!=
NULL
)
...
@@ -1611,14 +1617,20 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1611,14 +1617,20 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete
pmeAtomRange
;
delete
pmeAtomRange
;
if
(
pmeAtomGridIndex
!=
NULL
)
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
delete
pmeAtomGridIndex
;
if
(
pmeAtomDispersionGridIndex
!=
NULL
)
delete
pmeAtomDispersionGridIndex
;
if
(
pmeEnergyBuffer
!=
NULL
)
if
(
pmeEnergyBuffer
!=
NULL
)
delete
pmeEnergyBuffer
;
delete
pmeEnergyBuffer
;
if
(
dispersionPmeEnergyBuffer
!=
NULL
)
delete
dispersionPmeEnergyBuffer
;
if
(
sort
!=
NULL
)
if
(
sort
!=
NULL
)
delete
sort
;
delete
sort
;
if
(
fft
!=
NULL
)
if
(
fft
!=
NULL
)
delete
fft
;
delete
fft
;
if
(
pmeio
!=
NULL
)
if
(
pmeio
!=
NULL
)
delete
pmeio
;
delete
pmeio
;
if
(
dispersionPmeio
!=
NULL
)
delete
dispersionPmeio
;
if
(
hasInitializedFFT
)
{
if
(
hasInitializedFFT
)
{
if
(
useCudaFFT
)
{
if
(
useCudaFFT
)
{
cufftDestroy
(
fftForward
);
cufftDestroy
(
fftForward
);
...
@@ -1627,6 +1639,10 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1627,6 +1639,10 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
if
(
usePmeStream
)
{
if
(
usePmeStream
)
{
cuStreamDestroy
(
pmeStream
);
cuStreamDestroy
(
pmeStream
);
cuEventDestroy
(
pmeSyncEvent
);
cuEventDestroy
(
pmeSyncEvent
);
if
(
doLJPME
){
cuStreamDestroy
(
dispersionPmeStream
);
cuEventDestroy
(
dispersionPmeSyncEvent
);
}
}
}
}
}
}
}
...
@@ -1634,6 +1650,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1634,6 +1650,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
void
CudaCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
void
CudaCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
nonbondedMethod
=
CalcNonbondedForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
// Identify which exceptions are 1-4 interactions.
// Identify which exceptions are 1-4 interactions.
vector
<
pair
<
int
,
int
>
>
exclusions
;
vector
<
pair
<
int
,
int
>
>
exclusions
;
...
@@ -1650,14 +1668,29 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1650,14 +1668,29 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int
numParticles
=
force
.
getNumParticles
();
int
numParticles
=
force
.
getNumParticles
();
// Pack the C6 coeffiecient in with sigma and epsilon, in case LJPME is being used. The C6
// coefficients could live in a separate array, but that would hurt cache efficiency for LJPME.
doLJPME
=
nonbondedMethod
==
LJPME
;
sigmaEpsilon
=
CudaArray
::
create
<
float2
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
sigmaEpsilon
=
CudaArray
::
create
<
float2
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
if
(
doLJPME
){
if
(
cu
.
getUseDoublePrecision
())
C6s
=
CudaArray
::
create
<
double
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"C6s"
);
else
C6s
=
CudaArray
::
create
<
float
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"C6s"
);
}
CudaArray
&
posq
=
cu
.
getPosq
();
CudaArray
&
posq
=
cu
.
getPosq
();
vector
<
double4
>
temp
(
posq
.
getSize
());
vector
<
double4
>
temp
(
posq
.
getSize
());
float4
*
posqf
=
(
float4
*
)
&
temp
[
0
];
float4
*
posqf
=
(
float4
*
)
&
temp
[
0
];
double4
*
posqd
=
(
double4
*
)
&
temp
[
0
];
double4
*
posqd
=
(
double4
*
)
&
temp
[
0
];
// The C6 coefficients for LJPME could be computed from sigma and epsilon, but it
// seems like a good idea to cache them and avoid many recomputations.
vector
<
double
>
tmpc6
(
posq
.
getSize
());
float
*
c6f
=
(
float
*
)
&
tmpc6
[
0
];
double
*
c6d
=
(
double
*
)
&
tmpc6
[
0
];
vector
<
float2
>
sigmaEpsilonVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
float2
>
sigmaEpsilonVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
double
sumSquaredCharges
=
0.0
;
double
sumSquaredCharges
=
0.0
;
double
sumSquaredC6
=
0.0
;
hasCoulomb
=
false
;
hasCoulomb
=
false
;
hasLJ
=
false
;
hasLJ
=
false
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
...
@@ -1667,7 +1700,17 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1667,7 +1700,17 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
posqd
[
i
]
=
make_double4
(
0
,
0
,
0
,
charge
);
posqd
[
i
]
=
make_double4
(
0
,
0
,
0
,
charge
);
else
else
posqf
[
i
]
=
make_float4
(
0
,
0
,
0
,
(
float
)
charge
);
posqf
[
i
]
=
make_float4
(
0
,
0
,
0
,
(
float
)
charge
);
sigmaEpsilonVector
[
i
]
=
make_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
double
sig
=
(
float
)
(
0.5
*
sigma
);
double
eps
=
(
float
)
(
2.0
*
sqrt
(
epsilon
));
sigmaEpsilonVector
[
i
]
=
make_float2
(
sig
,
eps
);
if
(
doLJPME
){
float
C6
=
(
float
)
8.0
*
pow
(
sig
,
3
)
*
eps
;
sumSquaredC6
+=
C6
*
C6
;
if
(
cu
.
getUseDoublePrecision
())
c6d
[
i
]
=
8.0
*
pow
(
sig
,
3
)
*
eps
;
else
c6f
[
i
]
=
8.0
f
*
pow
((
float
)
sig
,
3
)
*
eps
;
}
exclusionList
[
i
].
push_back
(
i
);
exclusionList
[
i
].
push_back
(
i
);
sumSquaredCharges
+=
charge
*
charge
;
sumSquaredCharges
+=
charge
*
charge
;
if
(
charge
!=
0.0
)
if
(
charge
!=
0.0
)
...
@@ -1681,7 +1724,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1681,7 +1724,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
posq
.
upload
(
&
temp
[
0
]);
posq
.
upload
(
&
temp
[
0
]);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
nonbondedMethod
=
CalcNonbondedForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
if
(
doLJPME
)
C6s
->
upload
(
&
tmpc6
[
0
]);
bool
useCutoff
=
(
nonbondedMethod
!=
NoCutoff
);
bool
useCutoff
=
(
nonbondedMethod
!=
NoCutoff
);
bool
usePeriodic
=
(
nonbondedMethod
!=
NoCutoff
&&
nonbondedMethod
!=
CutoffNonPeriodic
);
bool
usePeriodic
=
(
nonbondedMethod
!=
NoCutoff
&&
nonbondedMethod
!=
CutoffNonPeriodic
);
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
...
@@ -1705,12 +1749,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1705,12 +1749,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines
[
"LJ_SWITCH_C5"
]
=
cu
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
(),
5.0
));
defines
[
"LJ_SWITCH_C5"
]
=
cu
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
(),
5.0
));
}
}
}
}
if
(
force
.
getUseDispersionCorrection
()
&&
cu
.
getContextIndex
()
==
0
)
if
(
force
.
getUseDispersionCorrection
()
&&
cu
.
getContextIndex
()
==
0
&&
!
doLJPME
)
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
else
else
dispersionCoefficient
=
0.0
;
dispersionCoefficient
=
0.0
;
alpha
=
0
;
alpha
=
0
;
ewaldSelfEnergy
=
0.0
;
ewaldSelfEnergy
=
0.0
;
dispersionSelfEnergy
=
0.0
;
if
(
nonbondedMethod
==
Ewald
)
{
if
(
nonbondedMethod
==
Ewald
)
{
// Compute the Ewald parameters.
// Compute the Ewald parameters.
...
@@ -1740,19 +1785,39 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1740,19 +1785,39 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
cosSinSums
=
new
CudaArray
(
cu
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
}
}
}
else
if
(
nonbondedMethod
==
PME
)
{
else
if
(
nonbondedMethod
==
PME
||
nonbondedMethod
==
LJPME
)
{
// Compute the PME parameters.
// Compute the PME parameters.
//
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
false
);
gridSizeX
=
CudaFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeX
=
CudaFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
CudaFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeY
=
CudaFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
gridSizeZ
);
gridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
gridSizeZ
);
if
(
doLJPME
){
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
dispersionAlpha
,
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
,
true
);
dispersionGridSizeX
=
CudaFFT3D
::
findLegalDimension
(
dispersionGridSizeX
);
dispersionGridSizeY
=
CudaFFT3D
::
findLegalDimension
(
dispersionGridSizeY
);
dispersionGridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
dispersionGridSizeZ
);
}
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
defines
[
"EWALD_ALPHA"
]
=
cu
.
doubleToString
(
alpha
);
if
(
doLJPME
)
{
defines
[
"EWALD_DISPERSION_ALPHA"
]
=
cu
.
doubleToString
(
dispersionAlpha
);
double
invRCut6
=
pow
(
force
.
getCutoffDistance
(),
-
6
);
double
dalphaR
=
dispersionAlpha
*
force
.
getCutoffDistance
();
double
dar2
=
dalphaR
*
dalphaR
;
double
dar4
=
dar2
*
dar2
;
double
multShift6
=
-
invRCut6
*
(
1.0
-
exp
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
));
defines
[
"INVCUT6"
]
=
cu
.
doubleToString
(
invRCut6
);
defines
[
"MULTSHIFT6"
]
=
cu
.
doubleToString
(
multShift6
);
}
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"TWO_OVER_SQRT_PI"
]
=
cu
.
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"USE_EWALD"
]
=
"1"
;
defines
[
"DO_LJPME"
]
=
doLJPME
?
"1"
:
"0"
;
if
(
cu
.
getContextIndex
()
==
0
)
{
if
(
cu
.
getContextIndex
()
==
0
)
{
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
);
if
(
doLJPME
)
dispersionSelfEnergy
=
pow
(
dispersionAlpha
,
6
)
*
sumSquaredC6
/
12.0
;
char
deviceName
[
100
];
char
deviceName
[
100
];
cuDeviceGetName
(
deviceName
,
100
,
cu
.
getDevice
());
cuDeviceGetName
(
deviceName
,
100
,
cu
.
getDevice
());
usePmeStream
=
(
!
cu
.
getPlatformData
().
disablePmeStream
&&
string
(
deviceName
)
!=
"GeForce GTX 980"
);
// Using a separate stream is slower on GTX 980
usePmeStream
=
(
!
cu
.
getPlatformData
().
disablePmeStream
&&
string
(
deviceName
)
!=
"GeForce GTX 980"
);
// Using a separate stream is slower on GTX 980
...
@@ -1771,7 +1836,19 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1771,7 +1836,19 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
if
(
cu
.
getPlatformData
().
deterministicForces
)
if
(
cu
.
getPlatformData
().
deterministicForces
)
pmeDefines
[
"USE_DETERMINISTIC_FORCES"
]
=
"1"
;
pmeDefines
[
"USE_DETERMINISTIC_FORCES"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
if
(
doLJPME
){
pmeDefines
[
"EWALD_DISPERSION_ALPHA"
]
=
cu
.
doubleToString
(
dispersionAlpha
);
pmeDefines
[
"DISPERSION_GRID_SIZE_X"
]
=
cu
.
intToString
(
dispersionGridSizeX
);
pmeDefines
[
"DISPERSION_GRID_SIZE_Y"
]
=
cu
.
intToString
(
dispersionGridSizeY
);
pmeDefines
[
"DISPERSION_GRID_SIZE_Z"
]
=
cu
.
intToString
(
dispersionGridSizeZ
);
pmeDefines
[
"RECIP_DISPERSION_EXP_FACTOR"
]
=
cu
.
doubleToString
(
M_PI
*
M_PI
/
(
dispersionAlpha
*
dispersionAlpha
));
}
CUmodule
module
;
if
(
doLJPME
){
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
+
CudaKernelSources
::
ljpme
,
pmeDefines
);
}
else
{
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
}
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
// Create the CPU PME kernel.
...
@@ -1782,6 +1859,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1782,6 +1859,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
pmeio
=
new
PmeIO
(
cu
,
addForcesKernel
);
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuPme
,
*
pmeio
));
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuPme
,
*
pmeio
));
if
(
doLJPME
){
cpuDispersionPme
=
getPlatform
().
createKernel
(
CalcDispersionPmeReciprocalForceKernel
::
Name
(),
*
cu
.
getPlatformData
().
context
);
cpuDispersionPme
.
getAs
<
CalcDispersionPmeReciprocalForceKernel
>
().
initialize
(
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
,
numParticles
,
dispersionAlpha
);
cu
.
addPreComputation
(
new
PmePreComputation
(
cu
,
cpuDispersionPme
,
*
pmeio
));
cu
.
addPostComputation
(
new
PmePostComputation
(
cpuDispersionPme
,
*
pmeio
));
}
}
}
catch
(
OpenMMException
&
ex
)
{
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
// The CPU PME plugin isn't available.
...
@@ -1796,6 +1880,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1796,6 +1880,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
cuFuncSetCacheConfig
(
pmeInterpolateForceKernel
,
CU_FUNC_CACHE_PREFER_L1
);
if
(
doLJPME
){
pmeDispersionFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadC6"
);
pmeDispersionGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomDispersionGridIndex"
);
pmeDispersionSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"gridSpreadC6"
);
pmeDispersionConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalDispersionConvolution"
);
pmeEvalDispersionEnergyKernel
=
cu
.
getKernel
(
module
,
"gridEvaluateDispersionEnergy"
);
pmeInterpolateDispersionForceKernel
=
cu
.
getKernel
(
module
,
"gridInterpolateDispersionForce"
);
cuFuncSetCacheConfig
(
pmeDispersionSpreadChargeKernel
,
CU_FUNC_CACHE_PREFER_L1
);
}
// Create required data structures.
// Create required data structures.
...
@@ -1803,14 +1896,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1803,14 +1896,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
2
*
elementSize
:
2
*
sizeof
(
long
long
),
"originalPmeGrid"
);
directPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
2
*
elementSize
:
2
*
sizeof
(
long
long
),
"originalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"reciprocalPmeGrid"
);
reciprocalPmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"reciprocalPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
cu
.
addAutoclearBuffer
(
*
directPmeGrid
);
if
(
doLJPME
){
directDispersionPmeGrid
=
new
CudaArray
(
cu
,
dispersionGridSizeX
*
dispersionGridSizeY
*
dispersionGridSizeZ
,
cu
.
getComputeCapability
()
>=
2.0
?
2
*
elementSize
:
2
*
sizeof
(
long
long
),
"originalDispersionPmeGrid"
);
cu
.
addAutoclearBuffer
(
*
directDispersionPmeGrid
);
reciprocalDispersionPmeGrid
=
new
CudaArray
(
cu
,
dispersionGridSizeX
*
dispersionGridSizeY
*
dispersionGridSizeZ
,
2
*
elementSize
,
"reciprocalDispersionPmeGrid"
);
}
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
if
(
doLJPME
)
pmeAtomDispersionGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomDispersionGridIndex"
);
int
energyElementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
energyElementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeEnergyBuffer
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
energyElementSize
,
"pmeEnergyBuffer"
);
pmeEnergyBuffer
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
energyElementSize
,
"pmeEnergyBuffer"
);
cu
.
clearBuffer
(
*
pmeEnergyBuffer
);
cu
.
clearBuffer
(
*
pmeEnergyBuffer
);
if
(
doLJPME
){
dispersionPmeEnergyBuffer
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
energyElementSize
,
"dispersionPmeEnergyBuffer"
);
cu
.
clearBuffer
(
*
dispersionPmeEnergyBuffer
);
}
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
int
cufftVersion
;
int
cufftVersion
;
cufftGetVersion
(
&
cufftVersion
);
cufftGetVersion
(
&
cufftVersion
);
...
@@ -1822,12 +1929,24 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1822,12 +1929,24 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
result
=
cufftPlan3d
(
&
fftBackward
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
if
(
doLJPME
){
result
=
cufftPlan3d
(
&
dispersionFftForward
,
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_D2Z
:
CUFFT_R2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing disperison FFT: "
+
cu
.
intToString
(
result
));
result
=
cufftPlan3d
(
&
dispersionFftBackward
,
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2D
:
CUFFT_C2R
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing disperison FFT: "
+
cu
.
intToString
(
result
));
}
}
}
else
else
{
fft
=
new
CudaFFT3D
(
cu
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
fft
=
new
CudaFFT3D
(
cu
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
dispersionFft
=
new
CudaFFT3D
(
cu
,
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
,
true
);
}
// Prepare for doing PME on its own stream.
// Prepare for doing PME on its own stream.
if
(
usePmeStream
)
{
if
(
usePmeStream
)
{
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
if
(
useCudaFFT
)
{
if
(
useCudaFFT
)
{
...
@@ -1840,9 +1959,21 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1840,9 +1959,21 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
recipForceGroup
=
force
.
getForceGroup
();
recipForceGroup
=
force
.
getForceGroup
();
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
cu
,
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
cu
,
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
cu
,
pmeSyncEvent
,
cu
.
getKernel
(
module
,
"addEnergy"
),
*
pmeEnergyBuffer
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
cu
,
pmeSyncEvent
,
cu
.
getKernel
(
module
,
"addEnergy"
),
*
pmeEnergyBuffer
,
recipForceGroup
));
if
(
doLJPME
){
cuStreamCreate
(
&
dispersionPmeStream
,
CU_STREAM_NON_BLOCKING
);
// Dispersion terms use yet another stream.
if
(
useCudaFFT
)
{
cufftSetStream
(
dispersionFftForward
,
dispersionPmeStream
);
cufftSetStream
(
dispersionFftBackward
,
dispersionPmeStream
);
}
CHECK_RESULT
(
cuEventCreate
(
&
dispersionPmeSyncEvent
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for Dispersion term of NonbondedForce"
);
// The force group is the same as the electrostatic PME reciprocal force group.
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
cu
,
dispersionPmeStream
,
dispersionPmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
cu
,
dispersionPmeSyncEvent
,
cu
.
getKernel
(
module
,
"addEnergy"
),
*
dispersionPmeEnergyBuffer
,
recipForceGroup
));
}
}
}
hasInitializedFFT
=
true
;
hasInitializedFFT
=
true
;
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
...
@@ -1916,13 +2047,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1916,13 +2047,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
}
}
}
}
// Add the interaction to the default nonbonded kernel.
// Add the interaction to the default nonbonded kernel.
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
coulombLennardJones
,
defines
);
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
coulombLennardJones
,
defines
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
(),
true
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
(),
true
);
if
(
hasLJ
)
if
(
hasLJ
){
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float"
,
2
,
sizeof
(
float2
),
sigmaEpsilon
->
getDevicePointer
()));
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float"
,
2
,
sizeof
(
float2
),
sigmaEpsilon
->
getDevicePointer
()));
if
(
doLJPME
){
if
(
cu
.
getUseDoublePrecision
())
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"C6s"
,
"double"
,
1
,
sizeof
(
double
),
C6s
->
getDevicePointer
()));
else
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"C6s"
,
"float"
,
1
,
sizeof
(
float
),
C6s
->
getDevicePointer
()));
}
}
// Initialize the exceptions.
// Initialize the exceptions.
...
@@ -1959,9 +2099,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1959,9 +2099,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
directPmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
usePmeStream
)
if
(
usePmeStream
)
cu
.
setCurrentStream
(
pmeStream
);
cu
.
setCurrentStream
(
pmeStream
);
// Invert the periodic box vectors.
// Invert the periodic box vectors.
Vec3
boxVectors
[
3
];
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
double
determinant
=
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
];
double
determinant
=
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
];
...
@@ -1985,7 +2125,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1985,7 +2125,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
recipBoxVectorPointer
[
1
]
=
&
recipBoxVectorsFloat
[
1
];
recipBoxVectorPointer
[
1
]
=
&
recipBoxVectorsFloat
[
1
];
recipBoxVectorPointer
[
2
]
=
&
recipBoxVectorsFloat
[
2
];
recipBoxVectorPointer
[
2
]
=
&
recipBoxVectorsFloat
[
2
];
}
}
// Execute the reciprocal space kernels.
// Execute the reciprocal space kernels.
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
...
@@ -2045,8 +2185,80 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2045,8 +2185,80 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cu
.
restoreDefaultStream
();
cu
.
restoreDefaultStream
();
}
}
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if
(
doLJPME
){
if
(
usePmeStream
)
cu
.
setCurrentStream
(
dispersionPmeStream
);
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomDispersionGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeDispersionGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
sort
->
sort
(
*
pmeAtomDispersionGridIndex
);
void
*
spreadArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
directDispersionPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomDispersionGridIndex
->
getDevicePointer
(),
&
C6s
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeDispersionSpreadChargeKernel
,
spreadArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
||
cu
.
getPlatformData
().
deterministicForces
)
{
void
*
finishSpreadArgs
[]
=
{
&
directDispersionPmeGrid
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeDispersionFinishSpreadChargeKernel
,
finishSpreadArgs
,
directDispersionPmeGrid
->
getSize
(),
256
);
}
if
(
useCudaFFT
)
{
if
(
cu
.
getUseDoublePrecision
())
cufftExecD2Z
(
dispersionFftForward
,
(
double
*
)
directDispersionPmeGrid
->
getDevicePointer
(),
(
double2
*
)
reciprocalDispersionPmeGrid
->
getDevicePointer
());
else
cufftExecR2C
(
dispersionFftForward
,
(
float
*
)
directDispersionPmeGrid
->
getDevicePointer
(),
(
float2
*
)
reciprocalDispersionPmeGrid
->
getDevicePointer
());
}
else
{
fft
->
execFFT
(
*
directDispersionPmeGrid
,
*
reciprocalDispersionPmeGrid
,
true
);
}
if
(
includeEnergy
)
{
void
*
computeEnergyArgs
[]
=
{
&
reciprocalDispersionPmeGrid
->
getDevicePointer
(),
usePmeStream
?
&
dispersionPmeEnergyBuffer
->
getDevicePointer
()
:
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
C6s
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeEvalDispersionEnergyKernel
,
computeEnergyArgs
,
dispersionGridSizeX
*
dispersionGridSizeY
*
dispersionGridSizeZ
);
}
void
*
convolutionArgs
[]
=
{
&
reciprocalDispersionPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeDispersionConvolutionKernel
,
convolutionArgs
,
dispersionGridSizeX
*
dispersionGridSizeY
*
dispersionGridSizeZ
,
256
);
if
(
useCudaFFT
)
{
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2D
(
dispersionFftBackward
,
(
double2
*
)
reciprocalDispersionPmeGrid
->
getDevicePointer
(),
(
double
*
)
directDispersionPmeGrid
->
getDevicePointer
());
else
cufftExecC2R
(
dispersionFftBackward
,
(
float2
*
)
reciprocalDispersionPmeGrid
->
getDevicePointer
(),
(
float
*
)
directDispersionPmeGrid
->
getDevicePointer
());
}
else
{
fft
->
execFFT
(
*
reciprocalDispersionPmeGrid
,
*
directDispersionPmeGrid
,
false
);
}
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directDispersionPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomDispersionGridIndex
->
getDevicePointer
(),
&
C6s
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInterpolateDispersionForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
usePmeStream
)
{
cuEventRecord
(
dispersionPmeSyncEvent
,
dispersionPmeStream
);
cu
.
restoreDefaultStream
();
}
}
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
includeReciprocal
)
energy
+=
dispersionSelfEnergy
;
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
energy
+=
dispersionCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
...
@@ -2092,7 +2304,9 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2092,7 +2304,9 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
vector
<
float2
>
sigmaEpsilonVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
float2
>
sigmaEpsilonVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
float3
>
sigmaEpsilonC6Vector
(
cu
.
getPaddedNumAtoms
(),
make_float3
(
0
,
0
,
0
));
double
sumSquaredCharges
=
0.0
;
double
sumSquaredCharges
=
0.0
;
double
sumSquaredC6
=
0.0
;
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
for
(
int
i
=
0
;
i
<
force
.
getNumParticles
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumParticles
();
i
++
)
{
int
index
=
order
[
i
];
int
index
=
order
[
i
];
...
@@ -2102,11 +2316,23 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2102,11 +2316,23 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
posqd
[
i
].
w
=
charge
;
posqd
[
i
].
w
=
charge
;
else
else
posqf
[
i
].
w
=
(
float
)
charge
;
posqf
[
i
].
w
=
(
float
)
charge
;
sigmaEpsilonVector
[
index
]
=
make_float2
((
float
)
(
0.5
*
sigma
),
(
float
)
(
2.0
*
sqrt
(
epsilon
)));
float
sig
=
(
float
)
(
0.5
*
sigma
);
float
eps
=
(
float
)
(
2.0
*
sqrt
(
epsilon
));
if
(
doLJPME
){
float
C6
=
(
float
)
8.0
*
pow
(
sig
,
3
)
*
eps
;
sumSquaredC6
+=
C6
*
C6
;
sigmaEpsilonC6Vector
[
index
]
=
make_float3
(
sig
,
eps
,
C6
);
}
else
{
sigmaEpsilonVector
[
index
]
=
make_float2
(
sig
,
eps
);
}
sumSquaredCharges
+=
charge
*
charge
;
sumSquaredCharges
+=
charge
*
charge
;
}
}
posq
.
upload
(
cu
.
getPinnedBuffer
());
posq
.
upload
(
cu
.
getPinnedBuffer
());
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
if
(
doLJPME
){
sigmaEpsilon
->
upload
(
sigmaEpsilonC6Vector
);
}
else
{
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
}
// Record the exceptions.
// Record the exceptions.
...
@@ -2125,6 +2351,8 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2125,6 +2351,8 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
if
(
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
)
if
(
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
)
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
ewaldSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
if
(
nonbondedMethod
==
LJPME
)
dispersionSelfEnergy
=
(
cu
.
getContextIndex
()
==
0
?
pow
(
dispersionAlpha
,
6
)
*
sumSquaredC6
/
12.0
:
0
);
if
(
force
.
getUseDispersionCorrection
()
&&
cu
.
getContextIndex
()
==
0
&&
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
))
if
(
force
.
getUseDispersionCorrection
()
&&
cu
.
getContextIndex
()
==
0
&&
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
))
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
context
.
getSystem
(),
force
);
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
context
.
getSystem
(),
force
);
cu
.
invalidateMolecules
();
cu
.
invalidateMolecules
();
...
@@ -2143,6 +2371,20 @@ void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
...
@@ -2143,6 +2371,20 @@ void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
}
}
void
CudaCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
!
doLJPME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
//cpuPme.getAs<CalcPmeReciprocalForceKernel>().getLJPMEParameters(alpha, nx, ny, nz);
throw
OpenMMException
(
"getPMEParametersInContext: CPUPME has not been implemented for LJPME yet."
);
else
{
alpha
=
this
->
dispersionAlpha
;
nx
=
dispersionGridSizeX
;
ny
=
dispersionGridSizeY
;
nz
=
dispersionGridSizeZ
;
}
}
class
CudaCustomNonbondedForceInfo
:
public
CudaForceInfo
{
class
CudaCustomNonbondedForceInfo
:
public
CudaForceInfo
{
public:
public:
CudaCustomNonbondedForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
CudaCustomNonbondedForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
a9f65649
...
@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
...
@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
}
void
CudaParallelCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
a9f65649
...
@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
deviceName
<<
name
;
}
}
size_t
printfsize
;
cuCtxGetLimit
(
&
printfsize
,
CU_LIMIT_PRINTF_FIFO_SIZE
);
cuCtxSetLimit
(
CU_LIMIT_PRINTF_FIFO_SIZE
,
10
*
printfsize
);
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
a9f65649
...
@@ -17,6 +17,25 @@
...
@@ -17,6 +17,25 @@
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expAlphaRSqr
;
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expAlphaRSqr
;
#endif
#endif
real
tempForce
=
0.0
f
;
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
real
c6
=
C6s1
*
C6s2
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
...
@@ -29,6 +48,13 @@
...
@@ -29,6 +48,13 @@
includeInteraction
=
false
;
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
posq1
.
w
*
posq2
.
w
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
posq1
.
w
*
posq2
.
w
;
}
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
#endif
#endif
}
}
else
{
else
{
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
...
@@ -36,7 +62,8 @@
...
@@ -36,7 +62,8 @@
real
sig2
=
invR
*
sig
;
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
epssig6
=
sig6
*
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
);
real
eps
=
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
;
real
epssig6
=
sig6
*
eps
;
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
#if USE_LJ_SWITCH
#if USE_LJ_SWITCH
...
@@ -48,6 +75,22 @@
...
@@ -48,6 +75,22 @@
ljEnergy
*=
switchValue
;
ljEnergy
*=
switchValue
;
}
}
#endif
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
#else
...
...
platforms/cuda/src/kernels/ljpme.cu
0 → 100644
View file @
a9f65649
extern
"C"
__global__
void
findAtomDispersionGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
DISPERSION_GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
DISPERSION_GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
DISPERSION_GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
DISPERSION_GRID_SIZE_X
,
((
int
)
t
.
y
)
%
DISPERSION_GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
DISPERSION_GRID_SIZE_Z
);
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
gridIndex
.
x
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
+
gridIndex
.
y
*
DISPERSION_GRID_SIZE_Z
+
gridIndex
.
z
);
}
}
extern
"C"
__global__
void
gridSpreadC6
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
// Process the atoms in spatially sorted order. This improves efficiency when writing
// the grid values.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
atom
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
DISPERSION_GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
DISPERSION_GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
DISPERSION_GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
DISPERSION_GRID_SIZE_X
,
((
int
)
t
.
y
)
%
DISPERSION_GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
DISPERSION_GRID_SIZE_Z
);
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real3
dr
=
make_real3
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
);
data
[
PME_ORDER
-
1
]
=
make_real3
(
0
);
data
[
1
]
=
dr
;
data
[
0
]
=
make_real3
(
1
)
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
make_real3
(
k
))
*
data
[
j
-
k
-
2
]
+
(
make_real3
(
j
-
k
)
-
dr
)
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
}
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
make_real3
(
j
))
*
data
[
PME_ORDER
-
j
-
2
]
+
(
make_real3
(
PME_ORDER
-
j
)
-
dr
)
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
// Spread the charge from this atom onto each grid point.
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
xbase
=
xbase
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
;
real
dx
=
data
[
ix
].
x
;
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
ybase
=
gridIndex
.
y
+
iy
;
ybase
-=
(
ybase
>=
DISPERSION_GRID_SIZE_Y
?
DISPERSION_GRID_SIZE_Y
:
0
);
ybase
=
xbase
+
ybase
*
DISPERSION_GRID_SIZE_Z
;
real
dy
=
data
[
iy
].
y
;
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
int
zindex
=
gridIndex
.
z
+
iz
;
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
// We need to grab the C6 coefficient from the array
real
add
=
C6s
[
atom
]
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#elif __CUDA_ARCH__ < 200 || defined(USE_DETERMINISTIC_FORCES)
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
int
gridIndex
=
index
;
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
DISPERSION_GRID_SIZE_X
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
)
/
2
);
atomicAdd
(
&
ulonglong_p
[
gridIndex
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#else
atomicAdd
(
&
originalPmeGrid
[
index
],
add
);
#endif
}
}
}
}
}
extern
"C"
__global__
void
finishSpreadC6
(
long
long
*
__restrict__
originalPmeGrid
)
{
real
*
floatGrid
=
(
real
*
)
originalPmeGrid
;
const
unsigned
int
gridSize
=
DISPERSION_GRID_SIZE_X
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
;
real
scale
=
1.0
f
/
(
real
)
0x100000000
;
#ifdef USE_DOUBLE_PRECISION
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
floatGrid
[
index
]
=
scale
*
originalPmeGrid
[
index
];
#else
for
(
int
index
=
2
*
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
index
<
gridSize
;
index
+=
2
*
blockDim
.
x
*
gridDim
.
x
)
{
floatGrid
[
index
]
=
scale
*
originalPmeGrid
[
index
/
2
];
if
(
index
+
1
<
gridSize
)
floatGrid
[
index
+
1
]
=
scale
*
originalPmeGrid
[(
index
+
gridSize
+
1
)
/
2
];
}
#endif
}
// convolutes the dispersion grid on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
reciprocalDispersionConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
real
scaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
alpha
=
EWALD_DISPERSION_ALPHA
;
real
bfac
=
M_PI
/
alpha
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
alpha
*
alpha
*
alpha
;
real
fac3
=
-
2
*
alpha
*
M_PI
*
M_PI
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
int
kx
=
index
/
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
));
int
remainder
=
index
-
kx
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
int
ky
=
remainder
/
(
GRID_SIZE_Z
/
2
+
1
);
int
kz
=
remainder
-
ky
*
(
GRID_SIZE_Z
/
2
+
1
);
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
mx
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
mx
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
denom
=
scaleFactor
/
(
bx
*
by
*
bz
);
real2
grid
=
halfcomplex_pmeGrid
[
index
];
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
real
t
=
1.0
f
/
(
1.0
f
+
0.3275911
f
*
b
);
real
erfcterm
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expterm
;
#else
real
erfcterm
=
ERFC
(
b
);
#endif
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
}
}
extern
"C"
__global__
void
gridEvaluateDispersionEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
DISPERSION_GRID_SIZE_X
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
;
const
real
scaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
alpha
=
EWALD_DISPERSION_ALPHA
;
real
bfac
=
M_PI
/
alpha
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
alpha
*
alpha
*
alpha
;
real
fac3
=
-
2
*
alpha
*
M_PI
*
M_PI
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
int
kx
=
index
/
(
DISPERSION_GRID_SIZE_Y
*
(
DISPERSION_GRID_SIZE_Z
));
int
remainder
=
index
-
kx
*
DISPERSION_GRID_SIZE_Y
*
(
DISPERSION_GRID_SIZE_Z
);
int
ky
=
remainder
/
(
DISPERSION_GRID_SIZE_Z
);
int
kz
=
remainder
-
ky
*
(
DISPERSION_GRID_SIZE_Z
);
int
mx
=
(
kx
<
(
DISPERSION_GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
DISPERSION_GRID_SIZE_X
);
int
my
=
(
ky
<
(
DISPERSION_GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
DISPERSION_GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
DISPERSION_GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
DISPERSION_GRID_SIZE_Z
);
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
mx
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
mx
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
denom
=
scaleFactor
/
(
bx
*
by
*
bz
);
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
real
t
=
1.0
f
/
(
1.0
f
+
0.3275911
f
*
b
);
real
erfcterm
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expterm
;
#else
real
erfcterm
=
ERFC
(
b
);
#endif
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
if
(
kz
>=
(
DISPERSION_GRID_SIZE_Z
/
2
+
1
))
{
kx
=
((
kx
==
0
)
?
kx
:
DISPERSION_GRID_SIZE_X
-
kx
);
ky
=
((
ky
==
0
)
?
ky
:
DISPERSION_GRID_SIZE_Y
-
ky
);
kz
=
DISPERSION_GRID_SIZE_Z
-
kz
;
}
int
indexInHalfComplexGrid
=
kz
+
ky
*
(
DISPERSION_GRID_SIZE_Z
/
2
+
1
)
+
kx
*
(
DISPERSION_GRID_SIZE_Y
*
(
DISPERSION_GRID_SIZE_Z
/
2
+
1
));
real2
grid
=
halfcomplex_pmeGrid
[
indexInHalfComplexGrid
];
// N.B. We inlcude the 0,0,0 point for dispersion
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
#ifdef USE_PME_STREAM
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
#else
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
#endif
}
extern
"C"
__global__
void
gridInterpolateDispersionForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
C6s
)
{
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
DISPERSION_GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
DISPERSION_GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
DISPERSION_GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
DISPERSION_GRID_SIZE_X
,
((
int
)
t
.
y
)
%
DISPERSION_GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
DISPERSION_GRID_SIZE_Z
);
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real3
dr
=
make_real3
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
);
data
[
PME_ORDER
-
1
]
=
make_real3
(
0
);
data
[
1
]
=
dr
;
data
[
0
]
=
make_real3
(
1
)
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
make_real3
(
k
))
*
data
[
j
-
k
-
2
]
+
(
make_real3
(
j
-
k
)
-
dr
)
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
}
ddata
[
0
]
=
-
data
[
0
];
for
(
int
j
=
1
;
j
<
PME_ORDER
;
j
++
)
ddata
[
j
]
=
data
[
j
-
1
]
-
data
[
j
];
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
make_real3
(
j
))
*
data
[
PME_ORDER
-
j
-
2
]
+
(
make_real3
(
PME_ORDER
-
j
)
-
dr
)
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
make_real3
(
1
)
-
dr
)
*
data
[
0
];
// Compute the force on this atom.
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
DISPERSION_GRID_SIZE_X
?
DISPERSION_GRID_SIZE_X
:
0
);
xbase
=
xbase
*
DISPERSION_GRID_SIZE_Y
*
DISPERSION_GRID_SIZE_Z
;
real
dx
=
data
[
ix
].
x
;
real
ddx
=
ddata
[
ix
].
x
;
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
ybase
=
gridIndex
.
y
+
iy
;
ybase
-=
(
ybase
>=
DISPERSION_GRID_SIZE_Y
?
DISPERSION_GRID_SIZE_Y
:
0
);
ybase
=
xbase
+
ybase
*
DISPERSION_GRID_SIZE_Z
;
real
dy
=
data
[
iy
].
y
;
real
ddy
=
ddata
[
iy
].
y
;
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
int
zindex
=
gridIndex
.
z
+
iz
;
zindex
-=
(
zindex
>=
DISPERSION_GRID_SIZE_Z
?
DISPERSION_GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
real
gridvalue
=
originalPmeGrid
[
index
];
force
.
x
+=
ddx
*
dy
*
data
[
iz
].
z
*
gridvalue
;
force
.
y
+=
dx
*
ddy
*
data
[
iz
].
z
*
gridvalue
;
force
.
z
+=
dx
*
dy
*
ddata
[
iz
].
z
*
gridvalue
;
}
}
}
real
q
=
C6s
[
atom
];
real
forceX
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
DISPERSION_GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
DISPERSION_GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
DISPERSION_GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
)));
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment