Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
fcbee6a5
Commit
fcbee6a5
authored
May 11, 2016
by
Peter Eastman
Browse files
Fixed bug in PME with triclinic boxes
parent
a4d327f5
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
26 deletions
+24
-26
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-0
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+3
-4
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+14
-14
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+6
-8
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
fcbee6a5
...
...
@@ -1944,6 +1944,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
fcbee6a5
...
...
@@ -18,7 +18,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -28,9 +29,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
fcbee6a5
...
...
@@ -1975,32 +1975,32 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBox
Size
Arg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setPeriodicBoxArg
s
(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
8
,
recipBoxVectors
[
2
]);
pmeSpreadChargeKernel.setArg<mm_double4>(
10
, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(
11
, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(
12
, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
8
,
recipBoxVectorsFloat
[
2
]);
pmeSpreadChargeKernel.setArg<mm_float4>(
10
, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(
11
, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(
12
, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBox
Size
Arg
(
cl
,
pmeSpreadChargeKernel
,
5
);
setPeriodicBoxArg
s
(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
6
,
recipBoxVectors
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
7
,
recipBoxVectors
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_double4
>
(
8
,
recipBoxVectors
[
2
]);
pmeSpreadChargeKernel.setArg<mm_double4>(
10
, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(
11
, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(
12
, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
6
,
recipBoxVectorsFloat
[
0
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
7
,
recipBoxVectorsFloat
[
1
]);
pmeSpreadChargeKernel
.
setArg
<
mm_float4
>
(
8
,
recipBoxVectorsFloat
[
2
]);
pmeSpreadChargeKernel.setArg<mm_float4>(
10
, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(
11
, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(
12
, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
...
...
platforms/opencl/src/kernels/pme.cl
View file @
fcbee6a5
...
...
@@ -83,7 +83,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
...
...
@@ -93,9 +94,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
int
atom
=
pmeAtomGridIndex[i].x
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
@@ -165,7 +164,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
}
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
...
...
@@ -179,9 +179,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
0
; i < NUM_ATOMS; i++) {
int
atom
=
i
;//pmeAtomGridIndex[i].x;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment