Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4ea10969
"platforms/cuda/vscode:/vscode.git/clone" did not exist on "f6c70d6e22d71da652480a2cb1fc38dc84ace8a6"
Commit
4ea10969
authored
Aug 08, 2013
by
peastman
Browse files
Finished OpenCL implementation of interaction groups
parent
54c0ca3f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
6 deletions
+52
-6
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+14
-5
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+38
-1
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
4ea10969
...
@@ -2178,11 +2178,19 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2178,11 +2178,19 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
vector
<
mm_int4
>
groupData
;
vector
<
mm_int4
>
groupData
;
for
(
int
tileSet
=
0
;
tileSet
<
numTileSets
;
tileSet
++
)
{
for
(
int
tileSet
=
0
;
tileSet
<
numTileSets
;
tileSet
++
)
{
int
indexInTileSet
=
0
;
int
indexInTileSet
=
0
;
int
minSize
=
0
;
if
(
cl
.
getSIMDWidth
()
<
32
)
{
// We need to include a barrier inside the inner loop, so ensure that all
// threads will loop the same number of times.
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
minSize
=
max
(
minSize
,
(
int
)
atomLists
[
tiles
[
tileOrder
[
i
].
second
].
first
].
size
());
}
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
{
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
int
tile
=
tileOrder
[
i
].
second
;
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
int
range
=
indexInTileSet
+
((
indexInTileSet
+
atoms1
.
size
())
<<
16
);
int
range
=
indexInTileSet
+
((
indexInTileSet
+
max
(
minSize
,
(
int
)
atoms1
.
size
())
)
<<
16
);
int
allFlags
=
(
1
<<
atoms2
.
size
())
-
1
;
int
allFlags
=
(
1
<<
atoms2
.
size
())
-
1
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms1
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms1
.
size
();
j
++
)
{
int
a1
=
atoms1
[
j
];
int
a1
=
atoms1
[
j
];
...
@@ -2193,7 +2201,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2193,7 +2201,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
indexInTileSet
+=
atoms1
.
size
();
indexInTileSet
+=
atoms1
.
size
();
}
}
for
(;
indexInTileSet
<
32
;
indexInTileSet
++
)
for
(;
indexInTileSet
<
32
;
indexInTileSet
++
)
groupData
.
push_back
(
mm_int4
(
0
,
0
,
0
,
0
));
groupData
.
push_back
(
mm_int4
(
0
,
0
,
minSize
<<
16
,
0
));
}
}
interactionGroupData
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
->
upload
(
groupData
);
interactionGroupData
->
upload
(
groupData
);
...
@@ -2242,7 +2250,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2242,7 +2250,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = localData[localIndex].params"
<<
(
i
+
1
)
<<
";
\n
"
;
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = localData[localIndex].params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
else
{
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 =
make_
"
<<
buffers
[
i
].
getType
()
<<
"("
;
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 =
(
"
<<
buffers
[
i
].
getType
()
<<
"
)
("
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
{
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
{
if
(
j
>
0
)
if
(
j
>
0
)
load2
<<
", "
;
load2
<<
", "
;
...
@@ -2299,7 +2307,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2299,7 +2307,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
if
(
!
hasInitializedKernel
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
hasInitializedKernel
=
true
;
int
index
=
0
;
int
index
=
0
;
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getLongForceBuffer
().
getDeviceBuffer
());
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
()
:
cl
.
getForceBuffers
()).
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionGroupData
->
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionGroupData
->
getDeviceBuffer
());
...
@@ -2310,7 +2319,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2310,7 +2319,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
if
(
globals
!=
NULL
)
if
(
globals
!=
NULL
)
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
}
}
int
forceThreadBlockSize
=
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
forceThreadBlockSize
=
max
(
32
,
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
)
;
cl
.
executeKernel
(
interactionGroupKernel
,
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
cl
.
executeKernel
(
interactionGroupKernel
,
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
4ea10969
...
@@ -14,8 +14,37 @@ typedef struct {
...
@@ -14,8 +14,37 @@ typedef struct {
#
endif
#
endif
}
AtomData
;
}
AtomData
;
/**
*
This
function
is
used
on
devices
that
don
't
support
64
bit
atomics.
Multiple
threads
within
*
a
single
tile
might
have
computed
forces
on
the
same
atom.
This
loops
over
them
and
makes
sure
*
that
only
one
thread
updates
the
force
on
any
given
atom.
*/
void
writeForces
(
__global
real4*
forceBuffers,__local
AtomData*
localData,
int
atomIndex
)
{
localData[get_local_id
(
0
)
].x
=
atomIndex
;
SYNC_WARPS
;
real4
forceSum
=
(
real4
)
0
;
int
start
=
(
get_local_id
(
0
)
/TILE_SIZE
)
*TILE_SIZE
;
int
end
=
start+32
;
bool
isFirst
=
true
;
for
(
int
i
=
start
; i < end; i++)
if
(
localData[i].x
==
atomIndex
)
{
forceSum
+=
(
real4
)
(
localData[i].fx,
localData[i].fy,
localData[i].fz,
0
)
;
isFirst
&=
(
i
>=
get_local_id
(
0
))
;
}
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
offset
=
atomIndex
+
warp*PADDED_NUM_ATOMS
;
if
(
isFirst
)
forceBuffers[offset]
+=
forceSum
;
SYNC_WARPS
;
}
__kernel
void
computeInteractionGroups
(
__kernel
void
computeInteractionGroups
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
...
@@ -78,6 +107,7 @@ __kernel void computeInteractionGroups(
...
@@ -78,6 +107,7 @@ __kernel void computeInteractionGroups(
tj
=
(
tj
==
rangeEnd-1
?
rangeStart
:
tj+1
)
;
tj
=
(
tj
==
rangeEnd-1
?
rangeStart
:
tj+1
)
;
SYNC_WARPS
;
SYNC_WARPS
;
}
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
if
(
exclusions
!=
0
)
{
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
...
@@ -86,6 +116,13 @@ __kernel void computeInteractionGroups(
...
@@ -86,6 +116,13 @@ __kernel void computeInteractionGroups(
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fy*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fy*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fz*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fz*0x100000000
))
;
}
}
#
else
writeForces
(
forceBuffers,
localData,
atom2
)
;
localData[get_local_id
(
0
)
].fx
=
force.x
;
localData[get_local_id
(
0
)
].fy
=
force.y
;
localData[get_local_id
(
0
)
].fz
=
force.z
;
writeForces
(
forceBuffers,
localData,
atom1
)
;
#
endif
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment