Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
53f770f4
Commit
53f770f4
authored
Feb 21, 2017
by
peastman
Committed by
GitHub
Feb 21, 2017
Browse files
Merge pull request #1557 from andysim/dpme
Dispersion PME
parents
f36f36bf
c0850062
Changes
62
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1286 additions
and
357 deletions
+1286
-357
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+4
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+4
-0
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+45
-1
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+67
-9
platforms/cuda/tests/TestCudaDispersionPME.cpp
platforms/cuda/tests/TestCudaDispersionPME.cpp
+36
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+28
-6
platforms/opencl/include/OpenCLParallelKernels.h
platforms/opencl/include/OpenCLParallelKernels.h
+10
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+312
-73
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+4
-0
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+45
-1
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+94
-11
platforms/opencl/tests/TestOpenCLDispersionPME.cpp
platforms/opencl/tests/TestOpenCLDispersionPME.cpp
+36
-0
platforms/reference/include/ReferenceKernels.h
platforms/reference/include/ReferenceKernels.h
+11
-2
platforms/reference/include/ReferenceLJCoulombIxn.h
platforms/reference/include/ReferenceLJCoulombIxn.h
+20
-8
platforms/reference/include/ReferencePME.h
platforms/reference/include/ReferencePME.h
+23
-1
platforms/reference/src/ReferenceKernels.cpp
platforms/reference/src/ReferenceKernels.cpp
+27
-5
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
...ms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+340
-238
platforms/reference/src/SimTKReference/ReferencePME.cpp
platforms/reference/src/SimTKReference/ReferencePME.cpp
+143
-0
platforms/reference/tests/TestReferenceDispersionPME.cpp
platforms/reference/tests/TestReferenceDispersionPME.cpp
+36
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+1
-1
No files found.
platforms/cuda/src/CudaParallelKernels.cpp
View file @
53f770f4
...
...
@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
void
CudaParallelCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
53f770f4
...
...
@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT
(
cuDeviceGetName
(
name
,
1000
,
contexts
[
i
]
->
getDevice
()),
"Error querying device name"
);
deviceName
<<
name
;
}
size_t
printfsize
;
cuCtxGetLimit
(
&
printfsize
,
CU_LIMIT_PRINTF_FIFO_SIZE
);
cuCtxSetLimit
(
CU_LIMIT_PRINTF_FIFO_SIZE
,
10
*
printfsize
);
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
53f770f4
...
...
@@ -17,6 +17,26 @@
const
real
erfcAlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
expAlphaRSqr
;
#endif
real
tempForce
=
0.0
f
;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA
*
r
;
const
real
dar2
=
dispersionAlphaR
*
dispersionAlphaR
;
const
real
dar4
=
dar2
*
dar2
;
const
real
dar6
=
dar4
*
dar2
;
const
real
invR2
=
invR
*
invR
;
const
real
expDar2
=
EXP
(
-
dar2
);
const
float2
sigExpProd
=
sigmaEpsilon1
*
sigmaEpsilon2
;
const
real
c6
=
64
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
x
*
sigExpProd
.
y
;
const
real
coef
=
invR2
*
invR2
*
invR2
*
c6
;
const
real
eprefac
=
1.0
f
+
dar2
+
0.5
f
*
dar4
;
const
real
dprefac
=
eprefac
+
dar6
/
6.0
f
;
#endif
#endif
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
...
...
@@ -29,6 +49,13 @@
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI
*
EWALD_ALPHA
*
138.935456
f
*
posq1
.
w
*
posq2
.
w
;
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
#endif
#endif
}
else
{
#if HAS_LENNARD_JONES
...
...
@@ -36,7 +63,8 @@
real
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
real
sig6
=
sig2
*
sig2
*
sig2
;
real
epssig6
=
sig6
*
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
);
real
eps
=
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
;
real
epssig6
=
sig6
*
eps
;
tempForce
=
epssig6
*
(
12.0
f
*
sig6
-
6.0
f
);
real
ljEnergy
=
epssig6
*
(
sig6
-
1.0
f
);
#if USE_LJ_SWITCH
...
...
@@ -48,6 +76,22 @@
ljEnergy
*=
switchValue
;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy
+=
coef
*
(
1.0
f
-
expDar2
*
eprefac
);
tempForce
+=
6.0
f
*
coef
*
(
1.0
f
-
expDar2
*
dprefac
);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2
=
sig
*
sig
;
sig6
=
sig2
*
sig2
*
sig2
*
INVCUT6
;
epssig6
=
eps
*
sig6
;
// The additive part of the potential shift
ljEnergy
+=
epssig6
*
(
1.0
f
-
sig6
);
// The multiplicative part of the potential shift
ljEnergy
+=
MULTSHIFT6
*
c6
;
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
...
...
platforms/cuda/src/kernels/pme.cu
View file @
53f770f4
...
...
@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
#ifdef USE_LJPME
,
const
float2
*
__restrict__
sigmaEpsilon
#endif
)
{
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
pos
.
w
;
#endif
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xbase
=
gridIndex
.
x
+
ix
;
xbase
-=
(
xbase
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
...
...
@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
real
add
=
pos
.
w
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
charge
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
...
@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
#ifdef USE_LJPME
const
real
recipScaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
bfac
=
M_PI
/
EWALD_ALPHA
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fac3
=
-
2
*
EWALD_ALPHA
*
M_PI
*
M_PI
;
#else
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
#endif
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
...
...
@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real
bz
=
pmeBsplineModuliZ
[
kz
];
real2
grid
=
halfcomplex_pmeGrid
[
index
];
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
#ifdef USE_LJPME
real
denom
=
recipScaleFactor
/
(
bx
*
by
*
bz
);
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
real
erfcterm
=
ERFC
(
b
);
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
#else
real
denom
=
m2
*
bx
*
by
*
bz
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
{
halfcomplex_pmeGrid
[
index
]
=
make_real2
(
grid
.
x
*
eterm
,
grid
.
y
*
eterm
);
}
#endif
}
}
...
...
@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
#ifdef USE_LJPME
const
real
recipScaleFactor
=
-
2
*
M_PI
*
SQRT
(
M_PI
)
*
RECIP
(
6
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
bfac
=
M_PI
/
EWALD_ALPHA
;
real
fac1
=
2
*
M_PI
*
M_PI
*
M_PI
*
SQRT
(
M_PI
);
real
fac2
=
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fac3
=
-
2
*
EWALD_ALPHA
*
M_PI
*
M_PI
;
#else
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
#endif
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
#ifdef USE_LJPME
real
denom
=
recipScaleFactor
/
(
bx
*
by
*
bz
);
real
m
=
SQRT
(
m2
);
real
m3
=
m
*
m2
;
real
b
=
bfac
*
m
;
real
expfac
=
-
b
*
b
;
real
expterm
=
EXP
(
expfac
);
real
erfcterm
=
ERFC
(
b
);
real
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
#else
real
denom
=
m2
*
bx
*
by
*
bz
;
real
eterm
=
recipScaleFactor
*
EXP
(
-
RECIP_EXP_FACTOR
*
m2
)
/
denom
;
#endif
if
(
kz
>=
(
GRID_SIZE_Z
/
2
+
1
))
{
kx
=
((
kx
==
0
)
?
kx
:
GRID_SIZE_X
-
kx
);
...
...
@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
}
int
indexInHalfComplexGrid
=
kz
+
ky
*
(
GRID_SIZE_Z
/
2
+
1
)
+
kx
*
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
));
real2
grid
=
halfcomplex_pmeGrid
[
indexInHalfComplexGrid
];
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
{
#ifndef USE_LJPME
if
(
kx
!=
0
||
ky
!=
0
||
kz
!=
0
)
#endif
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
}
#ifdef USE_PME_STREAM
#if defined(USE_PME_STREAM) && !defined(USE_LJPME)
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
#else
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
...
...
@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
#ifdef USE_LJPME
,
const
float2
*
__restrict__
sigmaEpsilon
#endif
)
{
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
#ifdef USE_LJPME
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
real
q
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
#endif
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
...
...
platforms/cuda/tests/TestCudaDispersionPME.cpp
0 → 100644
View file @
53f770f4
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestDispersionPME.h"
void
runPlatformTests
()
{
}
platforms/opencl/include/OpenCLKernels.h
View file @
53f770f4
...
...
@@ -591,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -629,6 +630,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
class
SortTrait
:
public
OpenCLSort
::
SortTrait
{
int
getDataSize
()
const
{
return
8
;}
...
...
@@ -657,6 +667,9 @@ private:
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
...
...
@@ -665,25 +678,34 @@ private:
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
dispersionFft
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeDispersionAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeDispersionZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeDispersionUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeDispersionFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeDispersionConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeDispersionEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeDispersionInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
,
dispersionAlpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
int
dispersionGridSizeX
,
dispersionGridSizeY
,
dispersionGridSizeZ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
,
doLJPME
;
NonbondedMethod
nonbondedMethod
;
static
const
int
PmeOrder
=
5
;
};
...
...
platforms/opencl/include/OpenCLParallelKernels.h
View file @
53f770f4
...
...
@@ -438,6 +438,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
class
Task
;
OpenCLPlatform
::
PlatformData
&
data
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
53f770f4
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
53f770f4
...
...
@@ -583,6 +583,10 @@ void OpenCLParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int
dynamic_cast
<
const
OpenCLCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
void
OpenCLParallelCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
OpenCLCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getLJPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
OpenCLParallelCalcCustomNonbondedForceKernel
::
Task
:
public
OpenCLContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
OpenCLCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
53f770f4
...
...
@@ -22,6 +22,26 @@
const
real
erfcAlphaR
=
(
0.254829592f+
(
-0.284496736f+
(
1.421413741f+
(
-1.453152027f+1.061405429f*t
)
*t
)
*t
)
*t
)
*t*
expAlphaRSqr
;
#
endif
real
tempForce
=
0
;
#
if
HAS_LENNARD_JONES
//
The
multiplicative
term
to
correct
for
the
multiplicative
terms
that
are
always
//
present
in
reciprocal
space.
The
real
terms
have
an
additive
contribution
//
added
in,
but
for
excluded
terms
the
multiplicative
term
is
just
subtracted.
//
These
factors
are
needed
in
both
clauses
of
the
needCorrection
statement,
so
//
I
declare
them
up
here.
#
if
DO_LJPME
const
real
dispersionAlphaR
=
EWALD_DISPERSION_ALPHA*r
;
const
real
dar2
=
dispersionAlphaR*dispersionAlphaR
;
const
real
dar4
=
dar2*dar2
;
const
real
dar6
=
dar4*dar2
;
const
real
invR2
=
invR*invR
;
const
real
expDar2
=
EXP
(
-dar2
)
;
const
float2
sigExpProd
=
sigmaEpsilon1*sigmaEpsilon2
;
const
real
c6
=
64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y
;
const
real
coef
=
invR2*invR2*invR2*c6
;
const
real
eprefac
=
1.0f
+
dar2
+
0.5f*dar4
;
const
real
dprefac
=
eprefac
+
dar6/6.0f
;
#
endif
#
endif
if
(
needCorrection
)
{
//
Subtract
off
the
part
of
this
interaction
that
was
included
in
the
reciprocal
space
contribution.
...
...
@@ -34,6 +54,13 @@
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
#
if
HAS_LENNARD_JONES
#
if
DO_LJPME
//
The
multiplicative
grid
term
tempEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
#
endif
#
endif
}
else
{
#
if
HAS_LENNARD_JONES
...
...
@@ -41,7 +68,8 @@
real
sig2
=
invR*sig
;
sig2
*=
sig2
;
real
sig6
=
sig2*sig2*sig2
;
real
epssig6
=
sig6*
(
sigmaEpsilon1.y*sigmaEpsilon2.y
)
;
real
eps
=
sigmaEpsilon1.y*sigmaEpsilon2.y
;
real
epssig6
=
sig6*eps
;
tempForce
=
epssig6*
(
12.0f*sig6
-
6.0f
)
;
real
ljEnergy
=
epssig6*
(
sig6
-
1.0f
)
;
#
if
USE_LJ_SWITCH
...
...
@@ -53,6 +81,22 @@
ljEnergy
*=
switchValue
;
}
#
endif
#
if
DO_LJPME
//
The
multiplicative
grid
term
ljEnergy
+=
coef*
(
1.0f
-
expDar2*eprefac
)
;
tempForce
+=
6.0f*coef*
(
1.0f
-
expDar2*dprefac
)
;
//
The
potential
shift
accounts
for
the
step
at
the
cutoff
introduced
by
the
//
transition
from
additive
to
multiplicative
combintion
rules
and
is
only
//
needed
for
the
real
(
not
excluded
)
terms.
By
addin
these
terms
to
ljEnergy
//
instead
of
tempEnergy
here,
the
includeInteraction
mask
is
correctly
applied.
sig2
=
sig*sig
;
sig6
=
sig2*sig2*sig2*INVCUT6
;
epssig6
=
eps*sig6
;
//
The
additive
part
of
the
potential
shift
ljEnergy
+=
epssig6*
(
1.0f
-
sig6
)
;
//
The
multiplicative
part
of
the
potential
shift
ljEnergy
+=
MULTSHIFT6*c6
;
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR,
includeInteraction
)
;
#
else
...
...
platforms/opencl/src/kernels/pme.cl
View file @
53f770f4
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
...
...
@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j].w
=
pos.w
; // Storing the charge here improves cache coherency in the charge spreading kernel
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
data[j].w
=
charge
; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
}
#
endif
...
...
@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
...
...
@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
...
...
@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w
*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
charge
*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
...
...
@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
...
...
@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
//
Spread
the
charge
from
this
atom
onto
each
grid
point.
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
const
real
charge
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
const
real
charge
=
pos.w
;
#
endif
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
...
...
@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*
pos.w
*data[ix].x*data[iy].y*data[iz].z;
pmeGrid[index] += EPSILON_FACTOR*
charge
*data[ix].x*data[iy].y*data[iz].z;
}
}
}
...
...
@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
...
...
@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
...
...
@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
#endif
}
}
...
...
@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...
...
@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
...
...
@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
{
#ifndef USE_LJPME
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
#
endif
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
#
ifdef
USE_PME_STREAM
#
if
defined
(
USE_PME_STREAM
)
&&
!defined
(
USE_LJPME
)
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
...
...
@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
#
ifdef
USE_LJPME
,
__global
const
float2*
restrict
sigmaEpsilon
#
endif
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
...
@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
real4
totalForce
=
forceBuffers[atom]
;
#
ifdef
USE_LJPME
const
float2
sigEps
=
sigmaEpsilon[atom]
;
real
q
=
8*sigEps.x*sigEps.x*sigEps.x*sigEps.y
;
#
else
real
q
=
pos.w*EPSILON_FACTOR
;
#
endif
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecX.x
)
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y
)
;
totalForce.z
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z
)
;
...
...
platforms/opencl/tests/TestOpenCLDispersionPME.cpp
0 → 100644
View file @
53f770f4
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestDispersionPME.h"
void
runPlatformTests
()
{
}
platforms/reference/include/ReferenceKernels.h
View file @
53f770f4
...
...
@@ -604,12 +604,21 @@ public:
* @param nz the number of grid points along the Z axis
*/
void
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
;
private:
int
numParticles
,
num14
;
int
**
bonded14IndexArray
;
RealOpenMM
**
particleParamArray
,
**
bonded14ParamArray
;
RealOpenMM
nonbondedCutoff
,
switchingDistance
,
rfDielectric
,
ewaldAlpha
,
dispersionCoefficient
;
int
kmax
[
3
],
gridSize
[
3
];
RealOpenMM
nonbondedCutoff
,
switchingDistance
,
rfDielectric
,
ewaldAlpha
,
ewaldDispersionAlpha
,
dispersionCoefficient
;
int
kmax
[
3
],
gridSize
[
3
]
,
dispersionGridSize
[
3
]
;
bool
useSwitchingFunction
;
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
NonbondedMethod
nonbondedMethod
;
...
...
platforms/reference/include/ReferenceLJCoulombIxn.h
View file @
53f770f4
...
...
@@ -38,14 +38,14 @@ class ReferenceLJCoulombIxn {
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
bool
pme
,
ljpme
;
const
OpenMM
::
NeighborList
*
neighborList
;
OpenMM
::
RealVec
periodicBoxVectors
[
3
];
RealOpenMM
cutoffDistance
,
switchingDistance
;
RealOpenMM
krf
,
crf
;
RealOpenMM
alphaEwald
;
RealOpenMM
alphaEwald
,
alphaDispersionEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
int
meshDim
[
3
]
,
dispersionMeshDim
[
3
]
;
// parameter indices
...
...
@@ -149,6 +149,18 @@ class ReferenceLJCoulombIxn {
void
setUsePME
(
RealOpenMM
alpha
,
int
meshSize
[
3
]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param dalpha the dispersion Ewald separation parameter
@param dgridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void
setUseLJPME
(
RealOpenMM
dalpha
,
int
dmeshSize
[
3
]);
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
...
...
platforms/reference/include/ReferencePME.h
View file @
53f770f4
...
...
@@ -87,6 +87,28 @@ pme_exec(pme_t pme,
RealOpenMM
*
energy
);
/*
* Evaluate reciprocal space PME dispersion energy and forces.
*
* Args:
*
* pme Opaque pme_t object, must have been initialized with pme_init()
* x Pointer to coordinate data array (nm)
* f Pointer to force data array (will be written as kJ/mol/nm)
* c6s Array of c6 coefficients (units of sqrt(kJ/mol).nm^3 )
* box Simulation cell dimensions (nm)
* energy Total energy (will be written in units of kJ/mol)
*/
int
OPENMM_EXPORT
pme_exec_dpme
(
pme_t
pme
,
const
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
const
std
::
vector
<
RealOpenMM
>&
c6s
,
const
OpenMM
::
RealVec
periodicBoxVectors
[
3
],
RealOpenMM
*
energy
);
/* Release all memory in pme structure */
int
OPENMM_EXPORT
...
...
platforms/reference/src/ReferenceKernels.cpp
View file @
53f770f4
...
...
@@ -969,9 +969,17 @@ void ReferenceCalcNonbondedForceKernel::initialize(const System& system, const N
}
else
if
(
nonbondedMethod
==
PME
)
{
double
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
]);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
]
,
false
);
ewaldAlpha
=
(
RealOpenMM
)
alpha
;
}
else
if
(
nonbondedMethod
==
LJPME
)
{
double
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
false
);
ewaldAlpha
=
(
RealOpenMM
)
alpha
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
dispersionGridSize
[
0
],
dispersionGridSize
[
1
],
dispersionGridSize
[
2
],
true
);
ewaldDispersionAlpha
=
(
RealOpenMM
)
alpha
;
useSwitchingFunction
=
false
;
}
rfDielectric
=
(
RealOpenMM
)
force
.
getReactionFieldDielectric
();
if
(
force
.
getUseDispersionCorrection
())
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
...
...
@@ -987,11 +995,12 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
bool
periodic
=
(
nonbondedMethod
==
CutoffPeriodic
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
bool
ljpme
=
(
nonbondedMethod
==
LJPME
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
computeNeighborListVoxelHash
(
*
neighborList
,
numParticles
,
posData
,
exclusions
,
extractBoxVectors
(
context
),
periodic
||
ewald
||
pme
,
nonbondedCutoff
,
0.0
);
computeNeighborListVoxelHash
(
*
neighborList
,
numParticles
,
posData
,
exclusions
,
extractBoxVectors
(
context
),
periodic
||
ewald
||
pme
||
ljpme
,
nonbondedCutoff
,
0.0
);
clj
.
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
if
(
periodic
||
ewald
||
pme
)
{
if
(
periodic
||
ewald
||
pme
||
ljpme
)
{
RealVec
*
boxVectors
=
extractBoxVectors
(
context
);
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxVectors
[
0
][
0
]
<
minAllowedSize
||
boxVectors
[
1
][
1
]
<
minAllowedSize
||
boxVectors
[
2
][
2
]
<
minAllowedSize
)
...
...
@@ -1002,6 +1011,10 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
clj
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
clj
.
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
ljpme
){
clj
.
setUsePME
(
ewaldAlpha
,
gridSize
);
clj
.
setUseLJPME
(
ewaldDispersionAlpha
,
dispersionGridSize
);
}
if
(
useSwitchingFunction
)
clj
.
setUseSwitchingFunction
(
switchingDistance
);
clj
.
calculatePairIxn
(
numParticles
,
posData
,
particleParamArray
,
exclusions
,
0
,
forceData
,
0
,
includeEnergy
?
&
energy
:
NULL
,
includeDirect
,
includeReciprocal
);
...
...
@@ -1059,14 +1072,23 @@ void ReferenceCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& con
}
void
ReferenceCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
nonbondedMethod
!=
PME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
if
(
nonbondedMethod
!=
PME
&&
nonbondedMethod
!=
LJPME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME
or LJPME
"
);
alpha
=
ewaldAlpha
;
nx
=
gridSize
[
0
];
ny
=
gridSize
[
1
];
nz
=
gridSize
[
2
];
}
void
ReferenceCalcNonbondedForceKernel
::
getLJPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
nonbondedMethod
!=
LJPME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using LJPME"
);
alpha
=
ewaldDispersionAlpha
;
nx
=
dispersionGridSize
[
0
];
ny
=
dispersionGridSize
[
1
];
nz
=
dispersionGridSize
[
2
];
}
ReferenceCalcCustomNonbondedForceKernel
::~
ReferenceCalcCustomNonbondedForceKernel
()
{
disposeRealArray
(
particleParamArray
,
numParticles
);
if
(
neighborList
!=
NULL
)
...
...
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
View file @
53f770f4
...
...
@@ -26,6 +26,7 @@
#include <sstream>
#include <complex>
#include <algorithm>
#include <iostream>
#include "SimTKOpenMMUtilities.h"
#include "ReferenceLJCoulombIxn.h"
...
...
@@ -47,7 +48,7 @@ using namespace OpenMM;
--------------------------------------------------------------------------------------- */
ReferenceLJCoulombIxn
::
ReferenceLJCoulombIxn
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
ReferenceLJCoulombIxn
::
ReferenceLJCoulombIxn
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
,
ljpme
(
false
)
{
// ---------------------------------------------------------------------------------------
...
...
@@ -73,7 +74,7 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
...
...
@@ -83,14 +84,14 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
--------------------------------------------------------------------------------------- */
void
ReferenceLJCoulombIxn
::
setUseCutoff
(
RealOpenMM
distance
,
const
OpenMM
::
NeighborList
&
neighbors
,
RealOpenMM
solventDielectric
)
{
void
ReferenceLJCoulombIxn
::
setUseCutoff
(
RealOpenMM
distance
,
const
OpenMM
::
NeighborList
&
neighbors
,
RealOpenMM
solventDielectric
)
{
cutoff
=
true
;
cutoffDistance
=
distance
;
neighborList
=
&
neighbors
;
krf
=
pow
(
cutoffDistance
,
-
3.0
)
*
(
solventDielectric
-
1.0
)
/
(
2.0
*
solventDielectric
+
1.0
);
crf
=
(
1.0
/
cutoffDistance
)
*
(
3.0
*
solventDielectric
)
/
(
2.0
*
solventDielectric
+
1.0
);
}
}
/**---------------------------------------------------------------------------------------
...
...
@@ -105,7 +106,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
switchingDistance
=
distance
;
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
...
...
@@ -115,7 +116,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void
ReferenceLJCoulombIxn
::
setPeriodic
(
OpenMM
::
RealVec
*
vectors
)
{
void
ReferenceLJCoulombIxn
::
setPeriodic
(
OpenMM
::
RealVec
*
vectors
)
{
assert
(
cutoff
);
assert
(
vectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
...
...
@@ -125,9 +126,9 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
periodicBoxVectors
[
0
]
=
vectors
[
0
];
periodicBoxVectors
[
1
]
=
vectors
[
1
];
periodicBoxVectors
[
2
]
=
vectors
[
2
];
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
...
...
@@ -138,15 +139,15 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void
ReferenceLJCoulombIxn
::
setUseEwald
(
RealOpenMM
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
void
ReferenceLJCoulombIxn
::
setUseEwald
(
RealOpenMM
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
alphaEwald
=
alpha
;
numRx
=
kmaxx
;
numRy
=
kmaxy
;
numRz
=
kmaxz
;
ewald
=
true
;
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
...
...
@@ -155,13 +156,30 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void
ReferenceLJCoulombIxn
::
setUsePME
(
RealOpenMM
alpha
,
int
meshSize
[
3
])
{
void
ReferenceLJCoulombIxn
::
setUsePME
(
RealOpenMM
alpha
,
int
meshSize
[
3
])
{
alphaEwald
=
alpha
;
meshDim
[
0
]
=
meshSize
[
0
];
meshDim
[
1
]
=
meshSize
[
1
];
meshDim
[
2
]
=
meshSize
[
2
];
pme
=
true
;
}
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion terms.
@param alpha the dispersion Ewald separation parameter
@param gridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void
ReferenceLJCoulombIxn
::
setUseLJPME
(
RealOpenMM
alpha
,
int
meshSize
[
3
])
{
alphaDispersionEwald
=
alpha
;
dispersionMeshDim
[
0
]
=
meshSize
[
0
];
dispersionMeshDim
[
1
]
=
meshSize
[
1
];
dispersionMeshDim
[
2
]
=
meshSize
[
2
];
ljpme
=
true
;
}
/**---------------------------------------------------------------------------------------
...
...
@@ -201,16 +219,27 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM
totalSelfEwaldEnergy
=
0.0
;
RealOpenMM
realSpaceEwaldEnergy
=
0.0
;
RealOpenMM
recipEnergy
=
0.0
;
RealOpenMM
recipDispersionEnergy
=
0.0
;
RealOpenMM
totalRecipEnergy
=
0.0
;
RealOpenMM
vdwEnergy
=
0.0
;
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
// A couple of sanity checks for
if
(
ljpme
&&
useSwitch
)
throw
OpenMMException
(
"Switching cannot be used with LJPME"
);
if
(
ljpme
&&
!
pme
)
throw
OpenMMException
(
"LJPME has been set, without PME being set"
);
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
if
(
includeReciprocal
)
{
for
(
int
atomID
=
0
;
atomID
<
numberOfAtoms
;
atomID
++
)
{
RealOpenMM
selfEwaldEnergy
=
(
RealOpenMM
)
(
ONE_4PI_EPS0
*
atomParameters
[
atomID
][
QIndex
]
*
atomParameters
[
atomID
][
QIndex
]
*
alphaEwald
/
SQRT_PI
);
if
(
ljpme
)
{
// Dispersion self term
selfEwaldEnergy
-=
pow
(
alphaDispersionEwald
,
6.0
)
*
64.0
*
pow
(
atomParameters
[
atomID
][
SigIndex
],
6.0
)
*
pow
(
atomParameters
[
atomID
][
EpsIndex
],
2.0
)
/
12.0
;
}
totalSelfEwaldEnergy
-=
selfEwaldEnergy
;
if
(
energyByAtom
)
{
energyByAtom
[
atomID
]
-=
selfEwaldEnergy
;
...
...
@@ -222,9 +251,9 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
*
totalEnergy
+=
totalSelfEwaldEnergy
;
}
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
// PME
if
(
pme
&&
includeReciprocal
)
{
...
...
@@ -245,8 +274,31 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
energyByAtom
[
n
]
+=
recipEnergy
;
pme_destroy
(
pmedata
);
if
(
ljpme
)
{
// Dispersion reciprocal space terms
pme_init
(
&
pmedata
,
alphaDispersionEwald
,
numberOfAtoms
,
dispersionMeshDim
,
5
,
1
);
std
::
vector
<
RealVec
>
dpmeforces
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
){
charges
[
i
]
=
8.0
*
pow
(
atomParameters
[
i
][
SigIndex
],
3.0
)
*
atomParameters
[
i
][
EpsIndex
];
dpmeforces
.
push_back
(
RealVec
());
}
pme_exec_dpme
(
pmedata
,
atomCoordinates
,
dpmeforces
,
charges
,
periodicBoxVectors
,
&
recipDispersionEnergy
);
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
){
forces
[
i
][
0
]
-=
2.0
*
dpmeforces
[
i
][
0
];
forces
[
i
][
1
]
-=
2.0
*
dpmeforces
[
i
][
1
];
forces
[
i
][
2
]
-=
2.0
*
dpmeforces
[
i
][
2
];
}
if
(
totalEnergy
)
*
totalEnergy
+=
recipDispersionEnergy
;
if
(
energyByAtom
)
for
(
int
n
=
0
;
n
<
numberOfAtoms
;
n
++
)
energyByAtom
[
n
]
+=
recipDispersionEnergy
;
pme_destroy
(
pmedata
);
}
}
// Ewald method
else
if
(
ewald
&&
includeReciprocal
)
{
...
...
@@ -258,7 +310,7 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
);
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
);
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
);
...
...
@@ -350,15 +402,16 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
}
}
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
if
(
!
includeDirect
)
return
;
RealOpenMM
totalVdwEnergy
=
0.0
f
;
RealOpenMM
totalRealSpaceEwaldEnergy
=
0.0
f
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
OpenMM
::
AtomPair
pair
=
(
*
neighborList
)[
i
];
int
ii
=
pair
.
first
;
...
...
@@ -387,6 +440,37 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM
eps
=
atomParameters
[
ii
][
EpsIndex
]
*
atomParameters
[
jj
][
EpsIndex
];
dEdR
+=
switchValue
*
eps
*
(
twelve
*
sig6
-
six
)
*
sig6
*
inverseR
*
inverseR
;
vdwEnergy
=
eps
*
(
sig6
-
one
)
*
sig6
;
if
(
ljpme
)
{
RealOpenMM
dalphaR
=
alphaDispersionEwald
*
r
;
RealOpenMM
dar2
=
dalphaR
*
dalphaR
;
RealOpenMM
dar4
=
dar2
*
dar2
;
RealOpenMM
dar6
=
dar4
*
dar2
;
RealOpenMM
inverseR2
=
inverseR
*
inverseR
;
RealOpenMM
c6i
=
8.0
*
pow
(
atomParameters
[
ii
][
SigIndex
],
3.0
)
*
atomParameters
[
ii
][
EpsIndex
];
RealOpenMM
c6j
=
8.0
*
pow
(
atomParameters
[
jj
][
SigIndex
],
3.0
)
*
atomParameters
[
jj
][
EpsIndex
];
// For the energies and forces, we first add the regular Lorentz−Berthelot terms. The C12 term is treated as usual
// but we then subtract out (remembering that the C6 term is negative) the multiplicative C6 term that has been
// computed in real space. Finally, we add a potential shift term to account for the difference between the LB
// and multiplicative functional forms at the cutoff.
RealOpenMM
emult
=
c6i
*
c6j
*
inverseR2
*
inverseR2
*
inverseR2
*
(
1.0
-
EXP
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
));
dEdR
+=
6.0
*
c6i
*
c6j
*
inverseR2
*
inverseR2
*
inverseR2
*
inverseR2
*
(
1.0
-
EXP
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
+
dar6
/
6.0
));
RealOpenMM
inverseCut2
=
1.0
/
(
cutoffDistance
*
cutoffDistance
);
RealOpenMM
inverseCut6
=
inverseCut2
*
inverseCut2
*
inverseCut2
;
sig2
=
atomParameters
[
ii
][
SigIndex
]
+
atomParameters
[
jj
][
SigIndex
];
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
// The additive part of the potential shift
RealOpenMM
potentialshift
=
eps
*
(
one
-
sig6
*
inverseCut6
)
*
sig6
*
inverseCut6
;
dalphaR
=
alphaDispersionEwald
*
cutoffDistance
;
dar2
=
dalphaR
*
dalphaR
;
dar4
=
dar2
*
dar2
;
// The multiplicative part of the potential shift
potentialshift
-=
c6i
*
c6j
*
inverseCut6
*
(
1.0
-
EXP
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
));
vdwEnergy
+=
emult
+
potentialshift
;
}
if
(
useSwitch
)
{
dEdR
-=
vdwEnergy
*
switchDeriv
*
inverseR
;
vdwEnergy
*=
switchValue
;
...
...
@@ -452,6 +536,24 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
realSpaceEwaldEnergy
=
(
RealOpenMM
)
(
alphaEwald
*
TWO_OVER_SQRT_PI
*
ONE_4PI_EPS0
*
atomParameters
[
ii
][
QIndex
]
*
atomParameters
[
jj
][
QIndex
]);
}
if
(
ljpme
){
// Dispersion terms. Here we just back out the reciprocal space terms, and don't add any extra real space terms.
RealOpenMM
dalphaR
=
alphaDispersionEwald
*
r
;
RealOpenMM
inverseR2
=
inverseR
*
inverseR
;
RealOpenMM
dar2
=
dalphaR
*
dalphaR
;
RealOpenMM
dar4
=
dar2
*
dar2
;
RealOpenMM
dar6
=
dar4
*
dar2
;
RealOpenMM
c6i
=
8.0
*
pow
(
atomParameters
[
ii
][
SigIndex
],
3.0
)
*
atomParameters
[
ii
][
EpsIndex
];
RealOpenMM
c6j
=
8.0
*
pow
(
atomParameters
[
jj
][
SigIndex
],
3.0
)
*
atomParameters
[
jj
][
EpsIndex
];
realSpaceEwaldEnergy
-=
c6i
*
c6j
*
inverseR2
*
inverseR2
*
inverseR2
*
(
1.0
-
EXP
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
));
RealOpenMM
dEdR
=
-
6.0
*
c6i
*
c6j
*
inverseR2
*
inverseR2
*
inverseR2
*
inverseR2
*
(
1.0
-
EXP
(
-
dar2
)
*
(
1.0
+
dar2
+
0.5
*
dar4
+
dar6
/
6.0
));
for
(
int
kk
=
0
;
kk
<
3
;
kk
++
)
{
RealOpenMM
force
=
dEdR
*
deltaR
[
0
][
kk
];
forces
[
ii
][
kk
]
-=
force
;
forces
[
jj
][
kk
]
+=
force
;
}
}
totalExclusionEnergy
+=
realSpaceEwaldEnergy
;
if
(
energyByAtom
)
{
energyByAtom
[
ii
]
-=
realSpaceEwaldEnergy
;
...
...
@@ -488,7 +590,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
RealOpenMM
*
fixedParameters
,
vector
<
RealVec
>&
forces
,
RealOpenMM
*
energyByAtom
,
RealOpenMM
*
totalEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
const
{
if
(
ewald
||
pme
)
{
if
(
ewald
||
pme
||
ljpme
)
{
calculateEwaldIxn
(
numberOfAtoms
,
atomCoordinates
,
atomParameters
,
exclusions
,
fixedParameters
,
forces
,
energyByAtom
,
totalEnergy
,
includeDirect
,
includeReciprocal
);
return
;
...
...
@@ -512,7 +614,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
...
...
@@ -608,5 +710,5 @@ void ReferenceLJCoulombIxn::calculateOneIxn(int ii, int jj, vector<RealVec>& ato
energyByAtom
[
ii
]
+=
energy
;
energyByAtom
[
jj
]
+=
energy
;
}
}
}
platforms/reference/src/SimTKReference/ReferencePME.cpp
View file @
53f770f4
...
...
@@ -513,6 +513,106 @@ pme_reciprocal_convolution(pme_t pme,
}
static
void
dpme_reciprocal_convolution
(
pme_t
pme
,
const
RealVec
periodicBoxVectors
[
3
],
const
RealVec
recipBoxVectors
[
3
],
RealOpenMM
*
energy
)
{
int
kx
,
ky
,
kz
;
int
nx
,
ny
,
nz
;
RealOpenMM
mx
,
my
,
mz
;
RealOpenMM
mhx
,
mhy
,
mhz
,
m2
;
RealOpenMM
bx
,
by
,
bz
;
RealOpenMM
d1
,
d2
;
RealOpenMM
eterm
,
struct2
,
ets2
;
RealOpenMM
esum
;
RealOpenMM
denom
;
RealOpenMM
boxfactor
;
RealOpenMM
maxkx
,
maxky
,
maxkz
;
t_complex
*
ptr
;
nx
=
pme
->
ngrid
[
0
];
ny
=
pme
->
ngrid
[
1
];
nz
=
pme
->
ngrid
[
2
];
boxfactor
=
(
RealOpenMM
)
M_PI
*
sqrt
(
M_PI
)
/
(
6.0
*
periodicBoxVectors
[
0
][
0
]
*
periodicBoxVectors
[
1
][
1
]
*
periodicBoxVectors
[
2
][
2
]);
esum
=
0
;
maxkx
=
(
RealOpenMM
)
((
nx
+
1
)
/
2
);
maxky
=
(
RealOpenMM
)
((
ny
+
1
)
/
2
);
maxkz
=
(
RealOpenMM
)
((
nz
+
1
)
/
2
);
RealOpenMM
bfac
=
M_PI
/
pme
->
ewaldcoeff
;
RealOpenMM
fac1
=
2.0
*
M_PI
*
M_PI
*
M_PI
*
sqrt
(
M_PI
);
RealOpenMM
fac2
=
pme
->
ewaldcoeff
*
pme
->
ewaldcoeff
*
pme
->
ewaldcoeff
;
RealOpenMM
fac3
=
-
2.0
*
pme
->
ewaldcoeff
*
M_PI
*
M_PI
;
RealOpenMM
b
,
m
,
m3
,
expfac
,
expterm
,
erfcterm
;
for
(
kx
=
0
;
kx
<
nx
;
kx
++
)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mx
=
(
RealOpenMM
)
((
kx
<
maxkx
)
?
kx
:
(
kx
-
nx
));
mhx
=
mx
*
recipBoxVectors
[
0
][
0
];
bx
=
pme
->
bsplines_moduli
[
0
][
kx
];
for
(
ky
=
0
;
ky
<
ny
;
ky
++
)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
my
=
(
RealOpenMM
)
((
ky
<
maxky
)
?
ky
:
(
ky
-
ny
));
mhy
=
mx
*
recipBoxVectors
[
1
][
0
]
+
my
*
recipBoxVectors
[
1
][
1
];
by
=
pme
->
bsplines_moduli
[
1
][
ky
];
for
(
kz
=
0
;
kz
<
nz
;
kz
++
)
{
/*
* Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
*/
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mz
=
(
RealOpenMM
)
((
kz
<
maxkz
)
?
kz
:
(
kz
-
nz
));
mhz
=
mx
*
recipBoxVectors
[
2
][
0
]
+
my
*
recipBoxVectors
[
2
][
1
]
+
mz
*
recipBoxVectors
[
2
][
2
];
/* Pointer to the grid cell in question */
ptr
=
pme
->
grid
+
kx
*
ny
*
nz
+
ky
*
nz
+
kz
;
/* Get grid data for this frequency */
d1
=
ptr
->
re
;
d2
=
ptr
->
im
;
/* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
bz
=
pme
->
bsplines_moduli
[
2
][
kz
];
denom
=
boxfactor
/
(
bx
*
by
*
bz
);
m
=
sqrt
(
m2
);
m3
=
m
*
m2
;
b
=
bfac
*
m
;
expfac
=
-
b
*
b
;
erfcterm
=
erfc
(
b
);
expterm
=
exp
(
expfac
);
eterm
=
(
fac1
*
erfcterm
*
m3
+
expterm
*
(
fac2
+
fac3
*
m2
))
*
denom
;
/* write back convolution data to grid */
ptr
->
re
=
d1
*
eterm
;
ptr
->
im
=
d2
*
eterm
;
struct2
=
(
d1
*
d1
+
d2
*
d2
);
/* Long-range PME contribution to the energy for this frequency */
ets2
=
eterm
*
struct2
;
esum
+=
ets2
;
}
}
}
// Remember the C6 energy is attractive, hence the negative sign.
*
energy
=
(
RealOpenMM
)
(
-
esum
);
}
static
void
pme_grid_interpolate_force
(
pme_t
pme
,
const
RealVec
recipBoxVectors
[
3
],
...
...
@@ -704,6 +804,49 @@ int pme_exec(pme_t pme,
}
int
pme_exec_dpme
(
pme_t
pme
,
const
vector
<
RealVec
>&
atomCoordinates
,
vector
<
RealVec
>&
forces
,
const
vector
<
RealOpenMM
>&
c6s
,
const
RealVec
periodicBoxVectors
[
3
],
RealOpenMM
*
energy
)
{
/* Routine is called with coordinates in x, a box, and charges in q */
RealVec
recipBoxVectors
[
3
];
invert_box_vectors
(
periodicBoxVectors
,
recipBoxVectors
);
/* Before we can do the actual interpolation, we need to recalculate and update
* the indices for each particle in the charge grid (initialized in pme_init()),
* and what its fractional offset in this grid cell is.
*/
/* Update charge grid indices and fractional offsets for each atom.
* The indices/fractions are stored internally in the pme datatype
*/
pme_update_grid_index_and_fraction
(
pme
,
atomCoordinates
,
periodicBoxVectors
,
recipBoxVectors
);
/* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
pme_update_bsplines
(
pme
);
/* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
pme_grid_spread_charge
(
pme
,
c6s
);
/* do 3d-fft */
fftpack_exec_3d
(
pme
->
fftplan
,
FFTPACK_FORWARD
,
pme
->
grid
,
pme
->
grid
);
/* solve in k-space */
dpme_reciprocal_convolution
(
pme
,
periodicBoxVectors
,
recipBoxVectors
,
energy
);
/* do 3d-invfft */
fftpack_exec_3d
(
pme
->
fftplan
,
FFTPACK_BACKWARD
,
pme
->
grid
,
pme
->
grid
);
/* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force
(
pme
,
recipBoxVectors
,
c6s
,
forces
);
return
0
;
}
int
pme_destroy
(
pme_t
pme
)
...
...
platforms/reference/tests/TestReferenceDispersionPME.cpp
0 → 100644
View file @
53f770f4
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "ReferenceTests.h"
#include "TestDispersionPME.h"
void
runPlatformTests
()
{
}
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
53f770f4
...
...
@@ -1155,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
NonbondedForce
nb
;
nb
.
setEwaldErrorTolerance
(
force
.
getEwaldErrorTolerance
());
nb
.
setCutoffDistance
(
force
.
getCutoffDistance
());
NonbondedForceImpl
::
calcPMEParameters
(
system
,
nb
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
NonbondedForceImpl
::
calcPMEParameters
(
system
,
nb
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
false
);
gridSizeX
=
CudaFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
CudaFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
CudaFFT3D
::
findLegalDimension
(
gridSizeZ
);
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment