Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e7a00c6a
Commit
e7a00c6a
authored
May 23, 2012
by
Peter Eastman
Browse files
Fixed a potential race condition in PME
parent
a3db0217
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
53 additions
and
17 deletions
+53
-17
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+29
-8
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+8
-1
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+1
-0
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+15
-8
No files found.
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
e7a00c6a
...
@@ -205,14 +205,6 @@ void kFindAtomRangeForGrid_kernel()
...
@@ -205,14 +205,6 @@ void kFindAtomRangeForGrid_kernel()
cSim
.
pPmeAtomRange
[
j
]
=
i
;
cSim
.
pPmeAtomRange
[
j
]
=
i
;
last
=
gridIndex
;
last
=
gridIndex
;
}
}
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
float
posz
=
cSim
.
pPosq
[
atomData
.
x
].
z
;
posz
-=
floorf
(
posz
*
cSim
.
invPeriodicBoxSizeZ
)
*
cSim
.
periodicBoxSizeZ
;
int
z
=
((
int
)
((
posz
*
cSim
.
invPeriodicBoxSizeZ
)
*
cSim
.
pmeGridSize
.
z
))
%
cSim
.
pmeGridSize
.
z
;
cSim
.
pPmeAtomGridIndex
[
i
].
y
=
z
;
}
}
// Fill in values beyond the last atom.
// Fill in values beyond the last atom.
...
@@ -225,6 +217,33 @@ void kFindAtomRangeForGrid_kernel()
...
@@ -225,6 +217,33 @@ void kFindAtomRangeForGrid_kernel()
}
}
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
1024
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
512
,
1
)
#else
__launch_bounds__
(
256
,
1
)
#endif
void
kRecordZIndex_kernel
()
{
int
thread
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
start
=
(
cSim
.
atoms
*
thread
)
/
(
blockDim
.
x
*
gridDim
.
x
);
int
end
=
(
cSim
.
atoms
*
(
thread
+
1
))
/
(
blockDim
.
x
*
gridDim
.
x
);
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
int2
atomData
=
cSim
.
pPmeAtomGridIndex
[
i
];
float
posz
=
cSim
.
pPosq
[
atomData
.
x
].
z
;
posz
-=
floorf
(
posz
*
cSim
.
invPeriodicBoxSizeZ
)
*
cSim
.
periodicBoxSizeZ
;
int
z
=
((
int
)
((
posz
*
cSim
.
invPeriodicBoxSizeZ
)
*
cSim
.
pmeGridSize
.
z
))
%
cSim
.
pmeGridSize
.
z
;
cSim
.
pPmeAtomGridIndex
[
i
].
y
=
z
;
}
}
__global__
__global__
void
kGridSpreadCharge_kernel
()
void
kGridSpreadCharge_kernel
()
{
{
...
@@ -392,6 +411,8 @@ void kCalculatePME(gpuContext gpu)
...
@@ -392,6 +411,8 @@ void kCalculatePME(gpuContext gpu)
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
kFindAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kFindAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindAtomRangeForGrid"
);
LAUNCHERROR
(
"kFindAtomRangeForGrid"
);
kRecordZIndex_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kRecordZIndex"
);
kGridSpreadCharge_kernel
<<<
16
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadCharge_kernel
<<<
16
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadCharge"
);
LAUNCHERROR
(
"kGridSpreadCharge"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
e7a00c6a
...
@@ -1263,6 +1263,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1263,6 +1263,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
::
Program
program
=
cl
.
createProgram
(
file
,
pmeDefines
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
pmeDefines
);
pmeUpdateBsplinesKernel
=
cl
::
Kernel
(
program
,
"updateBsplines"
);
pmeUpdateBsplinesKernel
=
cl
::
Kernel
(
program
,
"updateBsplines"
);
pmeAtomRangeKernel
=
cl
::
Kernel
(
program
,
"findAtomRangeForGrid"
);
pmeAtomRangeKernel
=
cl
::
Kernel
(
program
,
"findAtomRangeForGrid"
);
pmeZIndexKernel
=
cl
::
Kernel
(
program
,
"recordZIndex"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeSpreadChargeKernel
=
cl
::
Kernel
(
program
,
"gridSpreadCharge"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeConvolutionKernel
=
cl
::
Kernel
(
program
,
"reciprocalConvolution"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
pmeInterpolateForceKernel
=
cl
::
Kernel
(
program
,
"gridInterpolateForce"
);
...
@@ -1275,6 +1276,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1275,6 +1276,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomRange
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomRange
->
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeAtomRangeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeZIndexKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeZIndexKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getPosq
().
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeAtomGridIndex
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeAtomRange
->
getDeviceBuffer
());
pmeSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeAtomRange
->
getDeviceBuffer
());
...
@@ -1334,8 +1337,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1334,8 +1337,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
(),
PmeOrder
*
PmeOrder
*
PmeOrder
);
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
cl
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
pmeGrid
->
getSize
());
}
}
else
else
{
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
2
,
boxSize
);
pmeZIndexKernel
.
setArg
<
mm_float4
>
(
3
,
invBoxSize
);
cl
.
executeKernel
(
pmeZIndexKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeSpreadChargeKernel
,
cl
.
getNumAtoms
());
}
}
}
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
fft
->
execFFT
(
*
pmeGrid
,
*
pmeGrid2
,
true
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
pmeConvolutionKernel
.
setArg
<
mm_float4
>
(
5
,
invBoxSize
);
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
e7a00c6a
...
@@ -553,6 +553,7 @@ private:
...
@@ -553,6 +553,7 @@ private:
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeAtomRangeKernel
;
cl
::
Kernel
pmeZIndexKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeUpdateBsplinesKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
e7a00c6a
...
@@ -51,14 +51,6 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
...
@@ -51,14 +51,6 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
pmeAtomRange[j]
=
i
;
pmeAtomRange[j]
=
i
;
last
=
gridIndex
;
last
=
gridIndex
;
}
}
//
The
grid
index
won
't
be
needed
again.
Reuse
that
component
to
hold
the
z
index,
thus
saving
//
some
work
in
the
charge
spreading
kernel.
float
posz
=
posq[pmeAtomGridIndex[i].x].z
;
posz
-=
floor
(
posz*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
int
z
=
((
int
)
((
posz*invPeriodicBoxSize.z
)
*GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
pmeAtomGridIndex[i].y
=
z
;
}
}
//
Fill
in
values
beyond
the
last
atom.
//
Fill
in
values
beyond
the
last
atom.
...
@@ -70,6 +62,21 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
...
@@ -70,6 +62,21 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
}
}
}
}
/**
*
The
grid
index
won
't
be
needed
again.
Reuse
that
component
to
hold
the
z
index,
thus
saving
*
some
work
in
the
charge
spreading
kernel.
*/
__kernel
void
recordZIndex
(
__global
int2*
restrict
pmeAtomGridIndex,
__global
const
float4*
restrict
posq,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
int
start
=
(
NUM_ATOMS*get_global_id
(
0
))
/get_global_size
(
0
)
;
int
end
=
(
NUM_ATOMS*
(
get_global_id
(
0
)
+1
))
/get_global_size
(
0
)
;
for
(
int
i
=
start
; i < end; ++i) {
float
posz
=
posq[pmeAtomGridIndex[i].x].z
;
posz
-=
floor
(
posz*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
int
z
=
((
int
)
((
posz*invPeriodicBoxSize.z
)
*GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
pmeAtomGridIndex[i].y
=
z
;
}
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment