Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
db8a55b3
Commit
db8a55b3
authored
Dec 04, 2010
by
Peter Eastman
Browse files
Eliminated local memory bank conflicts
parent
72a8bb80
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
15 deletions
+32
-15
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+31
-14
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
db8a55b3
...
@@ -460,7 +460,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -460,7 +460,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionIndices
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionIndices
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionRowIndices
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionRowIndices
->
getDeviceBuffer
());
kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
*
localDataSize
:
OpenCLContext
::
ThreadBlockSize
*
localDataSize
),
NULL
);
kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
*
localDataSize
:
OpenCLContext
::
ThreadBlockSize
*
localDataSize
),
NULL
);
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
4
),
NULL
);
kernel
.
setArg
(
index
++
,
3
*
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
db8a55b3
...
@@ -13,7 +13,7 @@ typedef struct {
...
@@ -13,7 +13,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float
4
*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
#
else
...
@@ -180,29 +180,46 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -180,29 +180,46 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
float
tempEnergy
=
0.0f
;
float
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
int
bufferIndex
=
3*get_local_id
(
0
)
;
#
ifdef
USE_SYMMETRIC
#
ifdef
USE_SYMMETRIC
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
tempBuffer[get_local_id
(
0
)
]
=
delta
;
tempBuffer[bufferIndex]
=
delta.x
;
tempBuffer[bufferIndex+1]
=
delta.y
;
tempBuffer[bufferIndex+2]
=
delta.z
;
#
else
#
else
force.xyz
-=
dEdR1.xyz
;
force.xyz
-=
dEdR1.xyz
;
tempBuffer[get_local_id
(
0
)
]
=
dEdR2
;
tempBuffer[bufferIndex]
=
dEdR2.x
;
tempBuffer[bufferIndex+1]
=
dEdR2.y
;
tempBuffer[bufferIndex+2]
=
dEdR2.z
;
#
endif
#
endif
//
Sum
the
forces
on
atom2.
//
Sum
the
forces
on
atom2.
if
(
tgx
%
2
==
0
)
if
(
tgx
%
2
==
0
)
{
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+1].xyz
;
tempBuffer[bufferIndex]
+=
tempBuffer[bufferIndex+3]
;
if
(
tgx
%
4
==
0
)
tempBuffer[bufferIndex+1]
+=
tempBuffer[bufferIndex+4]
;
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+2].xyz
;
tempBuffer[bufferIndex+2]
+=
tempBuffer[bufferIndex+5]
;
if
(
tgx
%
8
==
0
)
}
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+4].xyz
;
if
(
tgx
%
4
==
0
)
{
if
(
tgx
%
16
==
0
)
tempBuffer[bufferIndex]
+=
tempBuffer[bufferIndex+6]
;
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+8].xyz
;
tempBuffer[bufferIndex+1]
+=
tempBuffer[bufferIndex+7]
;
tempBuffer[bufferIndex+2]
+=
tempBuffer[bufferIndex+8]
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer[bufferIndex]
+=
tempBuffer[bufferIndex+12]
;
tempBuffer[bufferIndex+1]
+=
tempBuffer[bufferIndex+13]
;
tempBuffer[bufferIndex+2]
+=
tempBuffer[bufferIndex+14]
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer[bufferIndex]
+=
tempBuffer[bufferIndex+24]
;
tempBuffer[bufferIndex+1]
+=
tempBuffer[bufferIndex+25]
;
tempBuffer[bufferIndex+2]
+=
tempBuffer[bufferIndex+26]
;
}
if
(
tgx
==
0
)
{
if
(
tgx
==
0
)
{
localData[tbx+j].fx
+=
tempBuffer[
get_local_id
(
0
)
].x
+
tempBuffer[get_local_id
(
0
)
+16].x
;
localData[tbx+j].fx
+=
tempBuffer[
bufferIndex]
+
tempBuffer[bufferIndex+48]
;
localData[tbx+j].fy
+=
tempBuffer[
get_local_id
(
0
)
].y
+
tempBuffer[get_local_id
(
0
)
+16].y
;
localData[tbx+j].fy
+=
tempBuffer[
bufferIndex+1]
+
tempBuffer[bufferIndex+49]
;
localData[tbx+j].fz
+=
tempBuffer[
get_local_id
(
0
)
].z
+
tempBuffer[get_local_id
(
0
)
+16].z
;
localData[tbx+j].fz
+=
tempBuffer[
bufferIndex+2]
+
tempBuffer[bufferIndex+50]
;
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment