Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
295ae001
Commit
295ae001
authored
Jan 07, 2015
by
Peter Eastman
Browse files
OpenCL supports PME with triclinic boxes
parent
9bcba51e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
177 additions
and
59 deletions
+177
-59
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+74
-20
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+51
-39
platforms/opencl/tests/TestOpenCLEwald.cpp
platforms/opencl/tests/TestOpenCLEwald.cpp
+52
-0
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
295ae001
...
@@ -70,13 +70,6 @@ static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ind
...
@@ -70,13 +70,6 @@ static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ind
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
}
static
void
setPeriodicBoxArgs
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
static
void
setPeriodicBoxArgs
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
{
if
(
cl
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxSizeDouble
());
...
@@ -1786,7 +1779,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1786,7 +1779,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeGrid
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeInterpolateForceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
pmeAtomGridIndex
->
getDeviceBuffer
());
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
pmeFinishSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid
->
getDeviceBuffer
());
...
@@ -1815,44 +1808,105 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1815,44 +1808,105 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
usePmeQueue
)
if
(
usePmeQueue
)
cl
.
setQueue
(
pmeQueue
);
cl
.
setQueue
(
pmeQueue
);
// Invert the periodic box vectors.
Vec3
boxVectors
[
3
];
cl
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
double
determinant
=
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
];
double
scale
=
1.0
/
determinant
;
mm_double4
recipBoxVectors
[
3
];
recipBoxVectors
[
0
]
=
mm_double4
(
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
]
*
scale
,
0
,
0
,
0
);
recipBoxVectors
[
1
]
=
mm_double4
(
-
boxVectors
[
1
][
0
]
*
boxVectors
[
2
][
2
]
*
scale
,
boxVectors
[
0
][
0
]
*
boxVectors
[
2
][
2
]
*
scale
,
0
,
0
);
recipBoxVectors
[
2
]
=
mm_double4
((
boxVectors
[
1
][
0
]
*
boxVectors
[
2
][
1
]
-
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
0
])
*
scale
,
-
boxVectors
[
0
][
0
]
*
boxVectors
[
2
][
1
]
*
scale
,
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
scale
,
0
);
mm_float4
recipBoxVectorsFloat
[
3
];
for
(
int
i
=
0
;
i
<
3
;
i
++
)
recipBoxVectorsFloat
[
i
]
=
mm_float4
((
float
)
recipBoxVectors
[
i
].
x
,
(
float
)
recipBoxVectors
[
i
].
y
,
(
float
)
recipBoxVectors
[
i
].
z
,
0
);
// Execute the reciprocal space kernels.
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
if
(
cl
.
getUseDoublePrecision
())
{
pmeUpdateBsplinesKernel
.
setArg
<
mm_double4
>
(
5
,
recipBoxVectors
[
0
]);
pmeUpdateBsplinesKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
1
]);
pmeUpdateBsplinesKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
2
]);
}
else
{
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
5
,
recipBoxVectorsFloat
[
0
]);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
1
]);
pmeUpdateBsplinesKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
2
]);
}
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
if
(
deviceIsCpu
)
{
if
(
deviceIsCpu
)
{
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
if
(
cl
.
getUseDoublePrecision
())
{
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
8
,
recipBoxVectors
[
2
]);
}
else
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
8
,
recipBoxVectorsFloat
[
2
]);
}
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
}
}
else
{
else
{
sort
->
sort
(
*
pmeAtomGridIndex
);
sort
->
sort
(
*
pmeAtomGridIndex
);
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
cl
.
getSupports64BitGlobalAtomics
())
{
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeSpreadChargeKernel
,
6
);
if
(
cl
.
getUseDoublePrecision
())
{
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
8
,
recipBoxVectors
[
2
]);
}
else
{
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
8
,
recipBoxVectorsFloat
[
2
]);
}
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
}
}
else
{
else
{
setPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
3
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeAtomRangeKernel
,
4
);
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeAtomRangeKernel
,
cl
.
getNumAtoms
());
setPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
2
);
setPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
2
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeZIndexKernel
,
3
);
if
(
cl
.
getUseDoublePrecision
())
pmeZIndexKernel
.
setArg
<
mm_double4
>
(
3
,
recipBoxVectors
[
2
]);
else
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
3
,
recipBoxVectorsFloat
[
2
]);
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
}
}
}
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeConvolutionKernel
,
5
);
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
double
scaleFactor
=
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
double
scaleFactor
=
1.0
/
(
M_PI
*
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
if
(
cl
.
getUseDoublePrecision
())
if
(
cl
.
getUseDoublePrecision
())
{
pmeConvolutionKernel
.
setArg
<
cl_double
>
(
6
,
scaleFactor
);
pmeConvolutionKernel
.
setArg
<
mm_double4
>
(
5
,
recipBoxVectors
[
0
]);
else
pmeConvolutionKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
1
]);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
6
,
(
float
)
scaleFactor
);
pmeConvolutionKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
2
]);
pmeConvolutionKernel
.
setArg
<
cl_double
>
(
8
,
scaleFactor
);
}
else
{
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
recipBoxVectorsFloat
[
0
]);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
1
]);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
2
]);
pmeConvolutionKernel
.
setArg
<
cl_float
>
(
8
,
(
float
)
scaleFactor
);
}
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeConvolutionKernel
,
cl
.
getNumAtoms
());
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
fft
->
execFFT
(
*
pmeGrid2
,
*
pmeGrid
,
false
);
setPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
3
);
setPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
3
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeInterpolateForceKernel
,
4
);
if
(
cl
.
getUseDoublePrecision
())
{
pmeInterpolateForceKernel
.
setArg
<
mm_double4
>
(
4
,
recipBoxVectors
[
0
]);
pmeInterpolateForceKernel
.
setArg
<
mm_double4
>
(
5
,
recipBoxVectors
[
1
]);
pmeInterpolateForceKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
2
]);
}
else
{
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
4
,
recipBoxVectorsFloat
[
0
]);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
5
,
recipBoxVectorsFloat
[
1
]);
pmeInterpolateForceKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
2
]);
}
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
else
else
...
...
platforms/opencl/src/kernels/pme.cl
View file @
295ae001
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
real4
pos
=
posq[i]
;
real4
pos
=
posq[i]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
real4
t
=
(
real4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
pos.z*recipBoxVecZ.z
)
;
t.x
=
(
t.x-floor
(
t.x
))
*GRID_SIZE_X
;
t.y
=
(
t.y-floor
(
t.y
))
*GRID_SIZE_Y
;
t.z
=
(
t.z-floor
(
t.z
))
*GRID_SIZE_Z
;
real4
dr
=
(
real4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0.0f
)
;
real4
dr
=
(
real4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0.0f
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.y
)
%
GRID_SIZE_Y,
...
@@ -41,7 +44,7 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
...
@@ -41,7 +44,7 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
/**
/**
*
For
each
grid
point,
find
the
range
of
sorted
atoms
associated
with
that
point.
*
For
each
grid
point,
find
the
range
of
sorted
atoms
associated
with
that
point.
*/
*/
__kernel
void
findAtomRangeForGrid
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
real4*
restrict
posq
,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
__kernel
void
findAtomRangeForGrid
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
real4*
restrict
posq
)
{
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
int
last
=
(
start
==
0
?
-1
:
pmeAtomGridIndex[start-1].y
)
;
int
last
=
(
start
==
0
?
-1
:
pmeAtomGridIndex[start-1].y
)
;
...
@@ -68,13 +71,13 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
...
@@ -68,13 +71,13 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
*
The
grid
index
won
't
be
needed
again.
Reuse
that
component
to
hold
the
z
index,
thus
saving
*
The
grid
index
won
't
be
needed
again.
Reuse
that
component
to
hold
the
z
index,
thus
saving
*
some
work
in
the
charge
spreading
kernel.
*
some
work
in
the
charge
spreading
kernel.
*/
*/
__kernel
void
recordZIndex
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
__kernel
void
recordZIndex
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
recipBoxVecZ
)
{
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
for
(
int
i
=
start
; i < end; ++i) {
for
(
int
i
=
start
; i < end; ++i) {
real
posz
=
posq[pmeAtomGridIndex[i].x].z
;
real
posz
=
posq[pmeAtomGridIndex[i].x].z
;
posz
-=
floor
(
posz*
invPeriodicBoxSize
.z
)
*periodicBoxSize.z
;
posz
-=
floor
(
posz*
recipBoxVecZ
.z
)
*periodicBoxSize.z
;
int
z
=
((
int
)
((
posz*
invPeriodicBoxSize
.z
)
*GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
int
z
=
((
int
)
((
posz*
recipBoxVecZ
.z
)
*GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
pmeAtomGridIndex[i].y
=
z
;
pmeAtomGridIndex[i].y
=
z
;
}
}
}
}
...
@@ -83,7 +86,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
...
@@ -83,7 +86,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
...
@@ -93,12 +96,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -93,12 +96,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
int
atom
=
pmeAtomGridIndex[i].x
;
int
atom
=
pmeAtomGridIndex[i].x
;
real4
pos
=
posq[atom]
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
real4
t
=
(
real4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
pos.z*recipBoxVecZ.z
)
;
t.x
=
(
t.x-floor
(
t.x
))
*GRID_SIZE_X
;
t.y
=
(
t.y-floor
(
t.y
))
*GRID_SIZE_Y
;
t.z
=
(
t.z-floor
(
t.z
))
*GRID_SIZE_Z
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
...
@@ -163,7 +169,7 @@ __kernel void finishSpreadCharge(__global long* restrict pmeGrid) {
...
@@ -163,7 +169,7 @@ __kernel void finishSpreadCharge(__global long* restrict pmeGrid) {
}
}
#
elif
defined
(
DEVICE_IS_CPU
)
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real2*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
__global
real2*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
if
(
firstx
==
lastx
)
...
@@ -177,12 +183,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -177,12 +183,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
0
; i < NUM_ATOMS; i++) {
for
(
int
i
=
0
; i < NUM_ATOMS; i++) {
int
atom
=
i
;//pmeAtomGridIndex[i].x;
int
atom
=
i
;//pmeAtomGridIndex[i].x;
real4
pos
=
posq[atom]
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
real4
t
=
(
real4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
pos.z*recipBoxVecZ.z
)
;
t.x
=
(
t.x-floor
(
t.x
))
*GRID_SIZE_X
;
t.y
=
(
t.y-floor
(
t.y
))
*GRID_SIZE_Y
;
t.z
=
(
t.z-floor
(
t.z
))
*GRID_SIZE_Z
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
...
@@ -290,7 +299,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -290,7 +299,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
#
endif
#
endif
__kernel
void
reciprocalConvolution
(
__global
real2*
restrict
pmeGrid,
__global
real*
restrict
energyBuffer,
__global
const
real*
restrict
pmeBsplineModuliX,
__kernel
void
reciprocalConvolution
(
__global
real2*
restrict
pmeGrid,
__global
real*
restrict
energyBuffer,
__global
const
real*
restrict
pmeBsplineModuliX,
__global
const
real*
restrict
pmeBsplineModuliY,
__global
const
real*
restrict
pmeBsplineModuliZ,
real4
invPeriodicBoxSize
,
real
recipScaleFactor
)
{
__global
const
real*
restrict
pmeBsplineModuliY,
__global
const
real*
restrict
pmeBsplineModuliZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
,
real
recipScaleFactor
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
real
energy
=
0.0f
;
real
energy
=
0.0f
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
...
@@ -303,9 +312,9 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
...
@@ -303,9 +312,9 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
int
mx
=
(
kx
<
(
GRID_SIZE_X+1
)
/2
)
?
kx
:
(
kx-GRID_SIZE_X
)
;
int
mx
=
(
kx
<
(
GRID_SIZE_X+1
)
/2
)
?
kx
:
(
kx-GRID_SIZE_X
)
;
int
my
=
(
ky
<
(
GRID_SIZE_Y+1
)
/2
)
?
ky
:
(
ky-GRID_SIZE_Y
)
;
int
my
=
(
ky
<
(
GRID_SIZE_Y+1
)
/2
)
?
ky
:
(
ky-GRID_SIZE_Y
)
;
int
mz
=
(
kz
<
(
GRID_SIZE_Z+1
)
/2
)
?
kz
:
(
kz-GRID_SIZE_Z
)
;
int
mz
=
(
kz
<
(
GRID_SIZE_Z+1
)
/2
)
?
kz
:
(
kz-GRID_SIZE_Z
)
;
real
mhx
=
mx*
invPeriodicBoxSize
.x
;
real
mhx
=
mx*
recipBoxVecX
.x
;
real
mhy
=
m
y*invPeriodicBoxSize
.y
;
real
mhy
=
m
x*recipBoxVecY.x+my*recipBoxVecY
.y
;
real
mhz
=
m
z*invPeriodicBoxSize
.z
;
real
mhz
=
m
x*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ
.z
;
real
bx
=
pmeBsplineModuliX[kx]
;
real
bx
=
pmeBsplineModuliX[kx]
;
real
by
=
pmeBsplineModuliY[ky]
;
real
by
=
pmeBsplineModuliY[ky]
;
real
bz
=
pmeBsplineModuliZ[kz]
;
real
bz
=
pmeBsplineModuliZ[kz]
;
...
@@ -320,7 +329,7 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
...
@@ -320,7 +329,7 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
}
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real2*
restrict
pmeGrid,
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real2*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
@@ -332,12 +341,15 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
...
@@ -332,12 +341,15 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int
atom
=
pmeAtomGridIndex[i].x
;
int
atom
=
pmeAtomGridIndex[i].x
;
real4
force
=
0.0f
;
real4
force
=
0.0f
;
real4
pos
=
posq[atom]
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
real4
t
=
(
real4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0.0f
)
;
pos.z*recipBoxVecZ.z
)
;
t.x
=
(
t.x-floor
(
t.x
))
*GRID_SIZE_X
;
t.y
=
(
t.y-floor
(
t.y
))
*GRID_SIZE_Y
;
t.z
=
(
t.z-floor
(
t.z
))
*GRID_SIZE_Z
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
...
@@ -385,9 +397,9 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
...
@@ -385,9 +397,9 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
real4
totalForce
=
forceBuffers[atom]
;
real4
totalForce
=
forceBuffers[atom]
;
real
q
=
pos.w*EPSILON_FACTOR
;
real
q
=
pos.w*EPSILON_FACTOR
;
totalForce.x
-=
q*force.x*GRID_SIZE_X*
invPeriodicBoxSize
.x
;
totalForce.x
-=
q*
(
force.x*GRID_SIZE_X*
recipBoxVecX
.x
)
;
totalForce.y
-=
q*force.y*GRID_SIZE_Y*
invPeriodicBoxSize
.y
;
totalForce.y
-=
q*
(
force.x*GRID_SIZE_X*recipBoxVecY.x+
force.y*GRID_SIZE_Y*
recipBoxVecY
.y
)
;
totalForce.z
-=
q*force.
z
*GRID_SIZE_
Z*invPeriodicBoxSize
.z
;
totalForce.z
-=
q*
(
force.
x
*GRID_SIZE_
X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ
.z
)
;
forceBuffers[atom]
=
totalForce
;
forceBuffers[atom]
=
totalForce
;
}
}
}
}
...
...
platforms/opencl/tests/TestOpenCLEwald.cpp
View file @
295ae001
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
}
}
void
testTriclinic
()
{
// Create a triclinic box containing eight particles.
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2.5
,
0
,
0
),
Vec3
(
0.5
,
3.0
,
0
),
Vec3
(
0.7
,
0.9
,
3.5
));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
system
.
addParticle
(
1.0
);
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
force
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
force
->
setCutoffDistance
(
1.0
);
force
->
setPMEParameters
(
3.45891
,
32
,
40
,
48
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
-
1
,
0.440104
,
0.4184
);
// Cl parameters
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
1
,
0.332840
,
0.0115897
);
// Na parameters
vector
<
Vec3
>
positions
(
8
);
positions
[
0
]
=
Vec3
(
1.744
,
2.788
,
3.162
);
positions
[
1
]
=
Vec3
(
1.048
,
0.762
,
2.340
);
positions
[
2
]
=
Vec3
(
2.489
,
1.570
,
2.817
);
positions
[
3
]
=
Vec3
(
1.027
,
1.893
,
3.271
);
positions
[
4
]
=
Vec3
(
0.937
,
0.825
,
0.009
);
positions
[
5
]
=
Vec3
(
2.290
,
1.887
,
3.352
);
positions
[
6
]
=
Vec3
(
1.266
,
1.111
,
2.894
);
positions
[
7
]
=
Vec3
(
0.933
,
1.862
,
3.490
);
// Compute the forces and energy.
VerletIntegrator
integ
(
0.001
);
Context
context
(
system
,
integ
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Compare them to values computed by Gromacs.
double
expectedEnergy
=
-
963.370
;
vector
<
Vec3
>
expectedForce
(
8
);
expectedForce
[
0
]
=
Vec3
(
4.25253e+01
,
-
1.23503e+02
,
1.22139e+02
);
expectedForce
[
1
]
=
Vec3
(
9.74752e+01
,
1.68213e+02
,
1.93169e+02
);
expectedForce
[
2
]
=
Vec3
(
-
1.50348e+02
,
1.29165e+02
,
3.70435e+02
);
expectedForce
[
3
]
=
Vec3
(
9.18644e+02
,
-
3.52571e+00
,
-
1.34772e+03
);
expectedForce
[
4
]
=
Vec3
(
-
1.61193e+02
,
9.01528e+01
,
-
7.12904e+01
);
expectedForce
[
5
]
=
Vec3
(
2.82630e+02
,
2.78029e+01
,
-
3.72864e+02
);
expectedForce
[
6
]
=
Vec3
(
-
1.47454e+02
,
-
2.14448e+02
,
-
3.55789e+02
);
expectedForce
[
7
]
=
Vec3
(
-
8.82195e+02
,
-
7.39132e+01
,
1.46202e+03
);
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ASSERT_EQUAL_VEC
(
expectedForce
[
i
],
state
.
getForces
()[
i
],
1e-4
);
}
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
// Create a cloud of random point charges.
// Create a cloud of random point charges.
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME
(
false
);
testEwaldPME
(
false
);
testEwaldPME
(
true
);
testEwaldPME
(
true
);
// testEwald2Ions();
// testEwald2Ions();
testTriclinic
();
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testPMEParameters
();
testPMEParameters
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment