Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4ea10969
Commit
4ea10969
authored
Aug 08, 2013
by
peastman
Browse files
Finished OpenCL implementation of interaction groups
parent
54c0ca3f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
6 deletions
+52
-6
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+14
-5
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+38
-1
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
4ea10969
...
...
@@ -2178,11 +2178,19 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
vector
<
mm_int4
>
groupData
;
for
(
int
tileSet
=
0
;
tileSet
<
numTileSets
;
tileSet
++
)
{
int
indexInTileSet
=
0
;
int
minSize
=
0
;
if
(
cl
.
getSIMDWidth
()
<
32
)
{
// We need to include a barrier inside the inner loop, so ensure that all
// threads will loop the same number of times.
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
minSize
=
max
(
minSize
,
(
int
)
atomLists
[
tiles
[
tileOrder
[
i
].
second
].
first
].
size
());
}
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
int
range
=
indexInTileSet
+
((
indexInTileSet
+
atoms1
.
size
())
<<
16
);
int
range
=
indexInTileSet
+
((
indexInTileSet
+
max
(
minSize
,
(
int
)
atoms1
.
size
())
)
<<
16
);
int
allFlags
=
(
1
<<
atoms2
.
size
())
-
1
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms1
.
size
();
j
++
)
{
int
a1
=
atoms1
[
j
];
...
...
@@ -2193,7 +2201,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
indexInTileSet
+=
atoms1
.
size
();
}
for
(;
indexInTileSet
<
32
;
indexInTileSet
++
)
groupData
.
push_back
(
mm_int4
(
0
,
0
,
0
,
0
));
groupData
.
push_back
(
mm_int4
(
0
,
0
,
minSize
<<
16
,
0
));
}
interactionGroupData
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
->
upload
(
groupData
);
...
...
@@ -2242,7 +2250,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = localData[localIndex].params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 =
make_
"
<<
buffers
[
i
].
getType
()
<<
"("
;
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 =
(
"
<<
buffers
[
i
].
getType
()
<<
"
)
("
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
{
if
(
j
>
0
)
load2
<<
", "
;
...
...
@@ -2299,7 +2307,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
int
index
=
0
;
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getLongForceBuffer
().
getDeviceBuffer
());
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
()
:
cl
.
getForceBuffers
()).
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionGroupData
->
getDeviceBuffer
());
...
...
@@ -2310,7 +2319,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
if
(
globals
!=
NULL
)
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
}
int
forceThreadBlockSize
=
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
forceThreadBlockSize
=
max
(
32
,
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
)
;
cl
.
executeKernel
(
interactionGroupKernel
,
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
4ea10969
...
...
@@ -14,8 +14,37 @@ typedef struct {
#
endif
}
AtomData
;
/**
*
This
function
is
used
on
devices
that
don
't
support
64
bit
atomics.
Multiple
threads
within
*
a
single
tile
might
have
computed
forces
on
the
same
atom.
This
loops
over
them
and
makes
sure
*
that
only
one
thread
updates
the
force
on
any
given
atom.
*/
void
writeForces
(
__global
real4*
forceBuffers,__local
AtomData*
localData,
int
atomIndex
)
{
localData[get_local_id
(
0
)
].x
=
atomIndex
;
SYNC_WARPS
;
real4
forceSum
=
(
real4
)
0
;
int
start
=
(
get_local_id
(
0
)
/TILE_SIZE
)
*TILE_SIZE
;
int
end
=
start+32
;
bool
isFirst
=
true
;
for
(
int
i
=
start
; i < end; i++)
if
(
localData[i].x
==
atomIndex
)
{
forceSum
+=
(
real4
)
(
localData[i].fx,
localData[i].fy,
localData[i].fz,
0
)
;
isFirst
&=
(
i
>=
get_local_id
(
0
))
;
}
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
offset
=
atomIndex
+
warp*PADDED_NUM_ATOMS
;
if
(
isFirst
)
forceBuffers[offset]
+=
forceSum
;
SYNC_WARPS
;
}
__kernel
void
computeInteractionGroups
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
...
...
@@ -78,6 +107,7 @@ __kernel void computeInteractionGroups(
tj
=
(
tj
==
rangeEnd-1
?
rangeStart
:
tj+1
)
;
SYNC_WARPS
;
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
...
...
@@ -86,6 +116,13 @@ __kernel void computeInteractionGroups(
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fy*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fz*0x100000000
))
;
}
#
else
writeForces
(
forceBuffers,
localData,
atom2
)
;
localData[get_local_id
(
0
)
].fx
=
force.x
;
localData[get_local_id
(
0
)
].fy
=
force.y
;
localData[get_local_id
(
0
)
].fz
=
force.z
;
writeForces
(
forceBuffers,
localData,
atom1
)
;
#
endif
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment