Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b1a22214
Commit
b1a22214
authored
Jan 26, 2010
by
Peter Eastman
Browse files
Implemented PME for OpenCL platform
parent
3a38084c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
390 additions
and
10 deletions
+390
-10
openmmapi/src/NonbondedForceImpl.cpp
openmmapi/src/NonbondedForceImpl.cpp
+3
-0
platforms/opencl/src/OpenCLFFT3D.cpp
platforms/opencl/src/OpenCLFFT3D.cpp
+10
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+165
-2
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+23
-1
platforms/opencl/src/OpenCLSort.h
platforms/opencl/src/OpenCLSort.h
+2
-0
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+1
-3
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+183
-0
platforms/opencl/tests/TestOpenCLEwald.cpp
platforms/opencl/tests/TestOpenCLEwald.cpp
+3
-3
No files found.
openmmapi/src/NonbondedForceImpl.cpp
View file @
b1a22214
...
...
@@ -143,6 +143,9 @@ void NonbondedForceImpl::calcPMEParameters(const System& system, const Nonbonded
xsize
=
(
int
)
ceil
(
alpha
*
boxVectors
[
0
][
0
]
/
pow
(
0.5
*
tol
,
0.2
));
ysize
=
(
int
)
ceil
(
alpha
*
boxVectors
[
1
][
1
]
/
pow
(
0.5
*
tol
,
0.2
));
zsize
=
(
int
)
ceil
(
alpha
*
boxVectors
[
2
][
2
]
/
pow
(
0.5
*
tol
,
0.2
));
xsize
=
max
(
xsize
,
5
);
ysize
=
max
(
ysize
,
5
);
zsize
=
max
(
zsize
,
5
);
}
int
NonbondedForceImpl
::
findZero
(
const
NonbondedForceImpl
::
ErrorFunction
&
f
,
int
initialGuess
)
{
...
...
platforms/opencl/src/OpenCLFFT3D.cpp
View file @
b1a22214
...
...
@@ -86,6 +86,7 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
if
(
unfactored
%
5
==
0
)
{
L
=
L
/
5
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 5)
\n
"
;
source
<<
"if (i < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -109,12 +110,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"data"
<<
output
<<
"[i+(4*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
xsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8+d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
xsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d8-d10);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(4*j+4)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
4
*
xsize
)
<<
"/"
<<
(
5
*
L
)
<<
"], d7-d9);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
5
;
unfactored
/=
5
;
}
else
if
(
unfactored
%
4
==
0
)
{
L
=
L
/
4
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 4)
\n
"
;
source
<<
"if (i < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -128,12 +131,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"data"
<<
output
<<
"[i+(3*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
xsize
<<
"/"
<<
(
4
*
L
)
<<
"], d1+d3);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
xsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d0-d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(3*j+3)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
3
*
xsize
)
<<
"/"
<<
(
4
*
L
)
<<
"], d1-d3);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
4
;
unfactored
/=
4
;
}
else
if
(
unfactored
%
3
==
0
)
{
L
=
L
/
3
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 3)
\n
"
;
source
<<
"if (i < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
...
...
@@ -144,17 +149,20 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
source
<<
"data"
<<
output
<<
"[i+2*j*"
<<
m
<<
"] = c0+d0;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
xsize
<<
"/"
<<
(
3
*
L
)
<<
"], d1+d2);
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(2*j+2)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
(
2
*
xsize
)
<<
"/"
<<
(
3
*
L
)
<<
"], d1-d2);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
3
;
unfactored
/=
3
;
}
else
if
(
unfactored
%
2
==
0
)
{
L
=
L
/
2
;
source
<<
"// Pass "
<<
(
stage
+
1
)
<<
" (radix 2)
\n
"
;
source
<<
"if (i < "
<<
(
L
*
m
)
<<
") {
\n
"
;
source
<<
"int j = i/"
<<
m
<<
";
\n
"
;
source
<<
"float2 c0 = data"
<<
input
<<
"[i];
\n
"
;
source
<<
"float2 c1 = data"
<<
input
<<
"[i+"
<<
(
L
*
m
)
<<
"];
\n
"
;
source
<<
"data"
<<
output
<<
"[i+j*"
<<
m
<<
"] = c0+c1;
\n
"
;
source
<<
"data"
<<
output
<<
"[i+(j+1)*"
<<
m
<<
"] = multiplyComplex(w[j*"
<<
xsize
<<
"/"
<<
(
2
*
L
)
<<
"], c0-c1);
\n
"
;
source
<<
"}
\n
"
;
m
=
m
*
2
;
unfactored
/=
2
;
}
...
...
@@ -167,7 +175,8 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int xmult,
// Create the kernel.
source
<<
"matrix[element] = data"
<<
(
stage
%
2
)
<<
"[i];"
;
source
<<
"matrix[element] = data"
<<
(
stage
%
2
)
<<
"[i];
\n
"
;
source
<<
"barrier(CLK_GLOBAL_MEM_FENCE);"
;
map
<
string
,
string
>
replacements
;
replacements
[
"XSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
xsize
);
replacements
[
"YSIZE"
]
=
OpenCLExpressionUtilities
::
intToString
(
ysize
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
b1a22214
...
...
@@ -691,6 +691,28 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete
exceptionParams
;
if
(
exceptionIndices
!=
NULL
)
delete
exceptionIndices
;
if
(
cosSinSums
!=
NULL
)
delete
cosSinSums
;
if
(
pmeGrid
!=
NULL
)
delete
pmeGrid
;
if
(
pmeBsplineModuliX
!=
NULL
)
delete
pmeBsplineModuliX
;
if
(
pmeBsplineModuliY
!=
NULL
)
delete
pmeBsplineModuliY
;
if
(
pmeBsplineModuliZ
!=
NULL
)
delete
pmeBsplineModuliZ
;
if
(
pmeBsplineTheta
!=
NULL
)
delete
pmeBsplineTheta
;
if
(
pmeBsplineDtheta
!=
NULL
)
delete
pmeBsplineDtheta
;
if
(
pmeAtomRange
!=
NULL
)
delete
pmeAtomRange
;
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
if
(
sort
!=
NULL
)
delete
sort
;
if
(
fft
!=
NULL
)
delete
fft
;
}
void
OpenCLCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -780,8 +802,101 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
ewaldForcesKernel
=
cl
::
Kernel
(
program
,
"calculateEwaldForces"
);
cosSinSums
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
"cosSinSums"
);
}
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
throw
OpenMMException
(
"OpenMMPlatform does not yet support PME"
);
else
if
(
force
.
getNonbondedMethod
()
==
NonbondedForce
::
PME
)
{
// Compute the PME parameters.
double
alpha
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
gridSizeX
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeY
);
gridSizeZ
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeZ
);
defines
[
"EWALD_ALPHA"
]
=
doubleToString
(
alpha
);
defines
[
"TWO_OVER_SQRT_PI"
]
=
doubleToString
(
2.0
/
sqrt
(
M_PI
));
defines
[
"USE_EWALD"
]
=
"1"
;
double
selfEnergyScale
=
ONE_4PI_EPS0
*
alpha
/
std
::
sqrt
(
M_PI
);
ewaldSelfEnergy
=
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
std
::
sqrt
(
M_PI
);
pmeDefines
[
"PME_ORDER"
]
=
intToString
(
PmeOrder
);
pmeDefines
[
"NUM_ATOMS"
]
=
intToString
(
numParticles
);
pmeDefines
[
"RECIP_EXP_FACTOR"
]
=
doubleToString
(
M_PI
*
M_PI
/
(
alpha
*
alpha
));
pmeDefines
[
"GRID_SIZE_X"
]
=
intToString
(
gridSizeX
);
pmeDefines
[
"GRID_SIZE_Y"
]
=
intToString
(
gridSizeY
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
intToString
(
gridSizeZ
);
pmeDefines
[
"EPSILON_FACTOR"
]
=
doubleToString
(
std
::
sqrt
(
ONE_4PI_EPS0
));
// Create required data structures.
pmeGrid
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
"pmeGrid"
);
pmeBsplineModuliX
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeX
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeY
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
OpenCLArray
<
cl_float
>
(
cl
,
gridSizeZ
,
"pmeBsplineModuliZ"
);
pmeBsplineTheta
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineTheta"
);
pmeBsplineDtheta
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
PmeOrder
*
numParticles
,
"pmeBsplineDtheta"
);
pmeAtomRange
=
new
OpenCLArray
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
<
mm_float2
>
(
cl
,
cl
.
getNumAtoms
(),
"float2"
,
"value.y"
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
// Initialize the b-spline moduli.
int
maxSize
=
max
(
max
(
gridSizeX
,
gridSizeY
),
gridSizeZ
);
vector
<
double
>
data
(
PmeOrder
);
vector
<
double
>
ddata
(
PmeOrder
);
vector
<
double
>
bsplines_data
(
maxSize
);
data
[
PmeOrder
-
1
]
=
0.0
;
data
[
1
]
=
0.0
;
data
[
0
]
=
1.0
;
for
(
int
i
=
3
;
i
<
PmeOrder
;
i
++
)
{
double
div
=
1.0
/
(
i
-
1.0
);
data
[
i
-
1
]
=
0.0
;
for
(
int
j
=
1
;
j
<
(
i
-
1
);
j
++
)
data
[
i
-
j
-
1
]
=
div
*
(
j
*
data
[
i
-
j
-
2
]
+
(
i
-
j
)
*
data
[
i
-
j
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
}
// Differentiate.
ddata
[
0
]
=
-
data
[
0
];
for
(
int
i
=
1
;
i
<
PmeOrder
;
i
++
)
ddata
[
i
]
=
data
[
i
-
1
]
-
data
[
i
];
double
div
=
1.0
/
(
PmeOrder
-
1
);
data
[
PmeOrder
-
1
]
=
0.0
;
for
(
int
i
=
1
;
i
<
(
PmeOrder
-
1
);
i
++
)
data
[
PmeOrder
-
i
-
1
]
=
div
*
(
i
*
data
[
PmeOrder
-
i
-
2
]
+
(
PmeOrder
-
i
)
*
data
[
PmeOrder
-
i
-
1
]);
data
[
0
]
=
div
*
data
[
0
];
for
(
int
i
=
0
;
i
<
maxSize
;
i
++
)
bsplines_data
[
i
]
=
0.0
;
for
(
int
i
=
1
;
i
<=
PmeOrder
;
i
++
)
bsplines_data
[
i
]
=
data
[
i
-
1
];
// Evaluate the actual bspline moduli for X/Y/Z.
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
int
ndata
=
(
dim
==
0
?
gridSizeX
:
dim
==
1
?
gridSizeY
:
gridSizeZ
);
vector
<
cl_float
>
moduli
(
ndata
);
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
double
sc
=
0.0
;
double
ss
=
0.0
;
for
(
int
j
=
0
;
j
<
ndata
;
j
++
)
{
double
arg
=
(
2.0
*
M_PI
*
i
*
j
)
/
ndata
;
sc
+=
bsplines_data
[
j
]
*
cos
(
arg
);
ss
+=
bsplines_data
[
j
]
*
sin
(
arg
);
}
moduli
[
i
]
=
(
float
)
(
sc
*
sc
+
ss
*
ss
);
}
for
(
int
i
=
0
;
i
<
ndata
;
i
++
)
{
if
(
moduli
[
i
]
<
1.0e-7
)
moduli
[
i
]
=
(
moduli
[
i
-
1
]
+
moduli
[
i
+
1
])
*
0.5
f
;
}
if
(
dim
==
0
)
pmeBsplineModuliX
->
upload
(
moduli
);
else
if
(
dim
==
1
)
pmeBsplineModuliY
->
upload
(
moduli
);
else
pmeBsplineModuliZ
->
upload
(
moduli
);
}
}
else
ewaldSelfEnergy
=
0.0
;
...
...
@@ -848,6 +963,43 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
ewaldForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
ewaldForcesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cosSinSums
->
getDeviceBuffer
());
}
if
(
pmeGrid
!=
NULL
)
{
mm_float4
boxSize
=
cl
.
getNonbondedUtilities
().
getPeriodicBoxSize
();
pmeDefines
[
"PERIODIC_BOX_SIZE_X"
]
=
doubleToString
(
boxSize
.
x
);
pmeDefines
[
"PERIODIC_BOX_SIZE_Y"
]
=
doubleToString
(
boxSize
.
y
);
pmeDefines
[
"PERIODIC_BOX_SIZE_Z"
]
=
doubleToString
(
boxSize
.
z
);
pmeDefines
[
"RECIP_SCALE_FACTOR"
]
=
doubleToString
(
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
));
cl
::
Program
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
pme
,
pmeDefines
);
pmeGridIndexKernel
=
cl
::
Kernel
(
program
,
"updateGridIndexAndFraction"
);
pmeAtomRangeKernel
=
cl
::
Kernel
(
program
,
"findAtomRangeForGrid"
);
pmeUpdateBsplinesKernel
=
cl
::
Kernel
(
program
,
"updateBsplines"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
pmeGridIndexKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeGridIndexKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeAtomRange
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineDtheta
->
getDeviceBuffer
());
pmeUpdateBsplinesKernel
.
setArg
(
3
,
2
*
OpenCLContext
::
ThreadBlockSize
*
PmeOrder
*
sizeof
(
mm_float4
),
NULL
);
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomRange
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineTheta
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineDtheta
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeGrid
->
getDeviceBuffer
());
}
}
if
(
exceptionIndices
!=
NULL
)
cl
.
executeKernel
(
exceptionsKernel
,
exceptionIndices
->
getSize
());
...
...
@@ -855,6 +1007,17 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
cl
.
executeKernel
(
ewaldSumsKernel
,
cosSinSums
->
getSize
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
if
(
pmeGrid
!=
NULL
)
{
cl
.
executeKernel
(
pmeGridIndexKernel
,
cl
.
getNumAtoms
());
sort
->
sort
(
*
pmeAtomGridIndex
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid
,
true
);
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid
,
false
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
}
}
double
OpenCLCalcNonbondedForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
b1a22214
...
...
@@ -30,7 +30,9 @@
#include "OpenCLPlatform.h"
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "OpenCLFFT3D.h"
#include "OpenCLParameterSet.h"
#include "OpenCLSort.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
...
...
@@ -350,7 +352,9 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
exceptionIndices
(
NULL
),
cosSinSums
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
exceptionIndices
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeBsplineDtheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
)
{
}
~
OpenCLCalcNonbondedForceKernel
();
/**
...
...
@@ -380,10 +384,28 @@ private:
OpenCLArray
<
mm_float4
>*
exceptionParams
;
OpenCLArray
<
mm_int4
>*
exceptionIndices
;
OpenCLArray
<
mm_float2
>*
cosSinSums
;
OpenCLArray
<
mm_float2
>*
pmeGrid
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliX
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliY
;
OpenCLArray
<
cl_float
>*
pmeBsplineModuliZ
;
OpenCLArray
<
mm_float4
>*
pmeBsplineTheta
;
OpenCLArray
<
mm_float4
>*
pmeBsplineDtheta
;
OpenCLArray
<
cl_int
>*
pmeAtomRange
;
OpenCLArray
<
mm_float2
>*
pmeAtomGridIndex
;
OpenCLSort
<
mm_float2
>*
sort
;
OpenCLFFT3D
*
fft
;
cl
::
Kernel
exceptionsKernel
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
double
cutoffSquared
,
ewaldSelfEnergy
;
static
const
int
PmeOrder
=
5
;
};
/**
...
...
platforms/opencl/src/OpenCLSort.h
View file @
b1a22214
...
...
@@ -92,6 +92,8 @@ public:
sortKernelSize
=
maxLocalBuffer
;
int
targetBucketSize
=
sortKernelSize
/
2
;
int
numBuckets
=
length
/
targetBucketSize
;
if
(
numBuckets
<
1
)
numBuckets
=
1
;
if
(
positionsKernelSize
>
numBuckets
)
positionsKernelSize
=
numBuckets
;
...
...
platforms/opencl/src/kernels/fft.cl
View file @
b1a22214
...
...
@@ -9,14 +9,12 @@ float2 multiplyComplex(float2 c1, float2 c2) {
__kernel
void
execFFT
(
__global
float2*
matrix,
float
sign,
__local
float2*
w,
__local
float2*
data0,
__local
float2*
data1
)
{
const
int
i
=
get_local_id
(
0
)
;
w[i]
=
(
float2
)
(
cos
(
-sign*i*2*M_PI/XSIZE
)
,
sin
(
-sign*i*2*M_PI/XSIZE
))
;
int
index
=
get_group_id
(
0
)
;
while
(
index
<
YSIZE*ZSIZE
)
{
for
(
int
index
=
get_group_id
(
0
)
; index < YSIZE*ZSIZE; index += get_num_groups(0)) {
int
z
=
index/YSIZE
;
int
y
=
index-z*YSIZE
;
int
element
=
i*XMULT+y*YMULT+z*ZMULT
;
data0[i]
=
matrix[element]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
COMPUTE_FFT
index
+=
get_num_groups
(
0
)
;
}
}
platforms/opencl/src/kernels/pme.cl
0 → 100644
View file @
b1a22214
__kernel
void
updateGridIndexAndFraction
(
__global
float4*
posq,
__global
float2*
pmeAtomGridIndex
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
float4
pos
=
posq[i]
;
float4
t
=
(
float4
)
((
pos.x/PERIODIC_BOX_SIZE_X+1.0f
)
*GRID_SIZE_X,
(
pos.y/PERIODIC_BOX_SIZE_Y+1.0f
)
*GRID_SIZE_Y,
(
pos.z/PERIODIC_BOX_SIZE_Z+1.0f
)
*GRID_SIZE_Z,
0.0f
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
pmeAtomGridIndex[i]
=
(
float2
)
(
i,
gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z
)
;
}
}
/**
*
For
each
grid
point,
find
the
range
of
sorted
atoms
associated
with
that
point.
*/
__kernel
void
findAtomRangeForGrid
(
__global
float4*
posq,
__global
float2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange
)
{
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
int
last
=
(
start
==
0
?
-1
:
pmeAtomGridIndex[start-1].y
)
;
for
(
int
i
=
start
; i < end; ++i) {
float2
atomData
=
pmeAtomGridIndex[i]
;
int
gridIndex
=
atomData.y
;
if
(
gridIndex
!=
last
)
{
for
(
int
j
=
last+1
; j <= gridIndex; ++j)
pmeAtomRange[j]
=
i
;
last
=
gridIndex
;
}
//
The
grid
index
won
't
be
needed
again.
Reuse
that
component
to
hold
the
atom
charge,
thus
saving
//
an
extra
load
operation
in
the
charge
spreading
kernel.
pmeAtomGridIndex[i].y
=
posq[
(
int
)
atomData.x].w
;
}
//
Fill
in
values
beyond
the
last
atom.
if
(
get_global_id
(
0
)
==
get_global_size
(
0
)
-1
)
{
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
for
(
int
j
=
last+1
; j <= gridSize; ++j)
pmeAtomRange[j]
=
NUM_ATOMS
;
}
}
__kernel
void
updateBsplines
(
__global
float4*
posq,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__local
float4*
bsplinesCache
)
{
const
float4
scale
=
1.0f/
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
float4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
float4*
ddata
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER
+
get_local_size
(
0
)
*PME_ORDER]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j]
=
0.0f
;
ddata[j]
=
0.0f
;
}
float4
pos
=
posq[i]
;
float4
t
=
(
float4
)
((
pos.x/PERIODIC_BOX_SIZE_X+1.0f
)
*GRID_SIZE_X,
(
pos.y/PERIODIC_BOX_SIZE_Y+1.0f
)
*GRID_SIZE_Y,
(
pos.z/PERIODIC_BOX_SIZE_Z+1.0f
)
*GRID_SIZE_Z,
0.0f
)
;
float4
dr
=
(
float4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0.0f
)
;
data[PME_ORDER-1]
=
0.0f
;
data[1]
=
dr
;
data[0]
=
1.0f-dr
;
for
(
int
j
=
3
; j < PME_ORDER; j++) {
float
div
=
1.0f/
(
j-1.0f
)
;
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
float4
)
k
)
*data[j-k-2]
+
(
-dr+
(
float4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
}
ddata[0]
=
-data[0]
;
for
(
int
j
=
1
; j < PME_ORDER; j++)
ddata[j]
=
data[j-1]-data[j]
;
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
float4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
float4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
pmeBsplineDTheta[i+j*NUM_ATOMS]
=
ddata[j]
;
}
}
}
__kernel
void
gridSpreadCharge
(
__global
float2*
pmeAtomGridIndex,
__global
int*
pmeAtomRange,
__global
float2*
pmeGrid,
__global
float4*
pmeBsplineTheta
)
{
unsigned
int
numGridPoints
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
for
(
int
gridIndex
=
get_global_id
(
0
)
; gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
int3
gridPoint
;
gridPoint.x
=
gridIndex/
(
GRID_SIZE_Y*GRID_SIZE_Z
)
;
int
remainder
=
gridIndex-gridPoint.x*GRID_SIZE_Y*GRID_SIZE_Z
;
gridPoint.y
=
remainder/GRID_SIZE_Z
;
gridPoint.z
=
remainder-gridPoint.y*GRID_SIZE_Z
;
gridPoint.x
+=
GRID_SIZE_X
;
gridPoint.y
+=
GRID_SIZE_Y
;
gridPoint.z
+=
GRID_SIZE_Z
;
float
result
=
0.0f
;
for
(
int
ix
=
0
; ix < PME_ORDER; ++ix)
for
(
int
iy
=
0
; iy < PME_ORDER; ++iy)
for
(
int
iz
=
0
; iz < PME_ORDER; ++iz) {
int
x
=
(
gridPoint.x-ix
)
%GRID_SIZE_X
;
int
y
=
(
gridPoint.y-iy
)
%GRID_SIZE_Y
;
int
z
=
(
gridPoint.z-iz
)
%GRID_SIZE_Z
;
int
gridIndex
=
x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z
;
int
firstAtom
=
pmeAtomRange[gridIndex]
;
int
lastAtom
=
pmeAtomRange[gridIndex+1]
;
for
(
int
i
=
firstAtom
; i < lastAtom; ++i) {
float2
atomData
=
pmeAtomGridIndex[i]
;
int
atomIndex
=
atomData.x
;
float
atomCharge
=
atomData.y
;
result
+=
atomCharge*pmeBsplineTheta[atomIndex+ix*NUM_ATOMS].x*pmeBsplineTheta[atomIndex+iy*NUM_ATOMS].y*pmeBsplineTheta[atomIndex+iz*NUM_ATOMS].z
;
}
}
pmeGrid[gridIndex]
=
(
float2
)
(
result*EPSILON_FACTOR,
0.0f
)
;
}
}
__kernel
void
reciprocalConvolution
(
__global
float2*
pmeGrid,
__global
float*
energyBuffer,
__global
float*
pmeBsplineModuliX,
__global
float*
pmeBsplineModuliY,
__global
float*
pmeBsplineModuliZ
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
energy
=
0.0f
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
kx
=
index/
(
GRID_SIZE_Y*GRID_SIZE_Z
)
;
int
remainder
=
index-kx*GRID_SIZE_Y*GRID_SIZE_Z
;
int
ky
=
remainder/GRID_SIZE_Z
;
int
kz
=
remainder-ky*GRID_SIZE_Z
;
if
(
kx
==
0
&&
ky
==
0
&&
kz
==
0
)
continue
;
int
mx
=
(
kx
<
(
GRID_SIZE_X+1
)
/2
)
?
kx
:
(
kx-GRID_SIZE_X
)
;
int
my
=
(
ky
<
(
GRID_SIZE_Y+1
)
/2
)
?
ky
:
(
ky-GRID_SIZE_Y
)
;
int
mz
=
(
kz
<
(
GRID_SIZE_Z+1
)
/2
)
?
kz
:
(
kz-GRID_SIZE_Z
)
;
float
mhx
=
mx/PERIODIC_BOX_SIZE_X
;
float
mhy
=
my/PERIODIC_BOX_SIZE_Y
;
float
mhz
=
mz/PERIODIC_BOX_SIZE_Z
;
float
bx
=
pmeBsplineModuliX[kx]
;
float
by
=
pmeBsplineModuliY[ky]
;
float
bz
=
pmeBsplineModuliZ[kz]
;
float2
grid
=
pmeGrid[index]
;
float
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
float
denom
=
m2*bx*by*bz
;
float
eterm
=
RECIP_SCALE_FACTOR*exp
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
pmeGrid[index]
=
(
float2
)
(
grid.x*eterm,
grid.y*eterm
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
__kernel
void
gridInterpolateForce
(
__global
float4*
posq,
__global
float4*
forceBuffers,
__global
float4*
pmeBsplineTheta,
__global
float4*
pmeBsplineDTheta,
__global
float2*
pmeGrid
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0)) {
float3
force
=
0.0f
;
float4
pos
=
posq[atom]
;
float4
t
=
(
float4
)
((
pos.x/PERIODIC_BOX_SIZE_X+1.0f
)
*GRID_SIZE_X,
(
pos.y/PERIODIC_BOX_SIZE_Y+1.0f
)
*GRID_SIZE_Y,
(
pos.z/PERIODIC_BOX_SIZE_Z+1.0f
)
*GRID_SIZE_Z,
0.0f
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
(
gridIndex.x
+
ix
)
%
GRID_SIZE_X
;
float
tx
=
pmeBsplineTheta[atom+ix*NUM_ATOMS].x
;
float
dtx
=
pmeBsplineDTheta[atom+ix*NUM_ATOMS].x
;
for
(
int
iy
=
0
; iy < PME_ORDER; iy++) {
int
yindex
=
(
gridIndex.y
+
iy
)
%
GRID_SIZE_Y
;
float
ty
=
pmeBsplineTheta[atom+iy*NUM_ATOMS].y
;
float
dty
=
pmeBsplineDTheta[atom+iy*NUM_ATOMS].y
;
for
(
int
iz
=
0
; iz < PME_ORDER; iz++) {
int
zindex
=
(
gridIndex.z
+
iz
)
%
GRID_SIZE_Z
;
float
tz
=
pmeBsplineTheta[atom+iz*NUM_ATOMS].z
;
float
dtz
=
pmeBsplineDTheta[atom+iz*NUM_ATOMS].z
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
float
gridvalue
=
pmeGrid[index].x
;
force.x
+=
dtx*ty*tz*gridvalue
;
force.y
+=
tx*dty*tz*gridvalue
;
force.z
+=
tx*ty*dtz*gridvalue
;
}
}
}
float4
totalForce
=
forceBuffers[atom]
;
float
q
=
pos.w*EPSILON_FACTOR
;
totalForce.x
-=
q*force.x*GRID_SIZE_X/PERIODIC_BOX_SIZE_X
;
totalForce.y
-=
q*force.y*GRID_SIZE_Y/PERIODIC_BOX_SIZE_Y
;
totalForce.z
-=
q*force.z*GRID_SIZE_Z/PERIODIC_BOX_SIZE_Z
;
forceBuffers[atom]
=
totalForce
;
}
}
platforms/opencl/tests/TestOpenCLEwald.cpp
View file @
b1a22214
...
...
@@ -123,7 +123,7 @@ void testEwaldPME() {
ASSERT_EQUAL_TOL
(
norm
,
(
clState2
.
getPotentialEnergy
()
-
clState
.
getPotentialEnergy
())
/
delta
,
tol
)
// (3) Check whether the Reference and OpenCL platforms agree when using PME
/*
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
clContext
.
reinitialize
();
referenceContext
.
reinitialize
();
...
...
@@ -158,7 +158,7 @@ void testEwaldPME() {
tol
=
1e-3
;
State
clState3
=
clContext3
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL(norm, (clState3.getPotentialEnergy()-clState.getPotentialEnergy())/delta, tol)
*/
ASSERT_EQUAL_TOL
(
norm
,
(
clState3
.
getPotentialEnergy
()
-
clState
.
getPotentialEnergy
())
/
delta
,
tol
)
}
void
testEwald2Ions
()
{
...
...
@@ -244,7 +244,7 @@ int main() {
testEwaldPME
();
// testEwald2Ions();
testErrorTolerance
(
NonbondedForce
::
Ewald
);
//
testErrorTolerance(NonbondedForce::PME);
testErrorTolerance
(
NonbondedForce
::
PME
);
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment