Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bf8b9f30
"openmmapi/vscode:/vscode.git/clone" did not exist on "6cc49dbe48ced91d1389346146c4aa5965d88604"
Commit
bf8b9f30
authored
Oct 07, 2011
by
Peter Eastman
Browse files
Fixed errors in GB on AMD GPUs
parent
b475e127
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
14 deletions
+22
-14
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+10
-6
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+12
-8
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
bf8b9f30
...
...
@@ -1532,7 +1532,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
longBornSum
->
getDeviceBuffer
()
:
bornSum
->
getDeviceBuffer
()));
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
nb
.
getForceThreadBlockSize
())
*
13
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
nb
.
getForceThreadBlockSize
())
*
7
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
1
:
nb
.
getForceThreadBlockSize
())
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
...
...
@@ -1544,8 +1544,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
if
(
cl
.
getSIMDWidth
()
==
32
)
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
}
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
index
=
0
;
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
...
...
@@ -1553,7 +1555,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
nb
.
getForceThreadBlockSize
())
*
13
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
nb
.
getForceThreadBlockSize
())
*
9
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
1
:
nb
.
getForceThreadBlockSize
())
*
sizeof
(
mm_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
...
...
@@ -1565,8 +1567,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
if
(
cl
.
getSIMDWidth
()
==
32
)
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
}
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
bf8b9f30
...
...
@@ -3,12 +3,9 @@
typedef
struct
{
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz,
fw
;
float
radius,
scaledRadius
;
float
bornSum
;
float
bornRadius
;
float
bornForce
;
}
AtomData
;
}
AtomData1
;
/**
*
Compute
the
Born
sum.
...
...
@@ -16,7 +13,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__local
AtomData
1
*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
)
{
#
else
...
...
@@ -196,14 +193,21 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
}
}
typedef struct {
float x, y, z;
float q;
float fx, fy, fz, fw;
float bornRadius;
} AtomData2;
/**
* First part of computing the GBSA interaction.
*/
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeGBSAForce1(__global float4* forceBuffers, __global float*
energyBuffer
,
__global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce,
__local AtomData* localData, __local float4* tempBuffer,
void computeGBSAForce1(__global float4* forceBuffers, __global float*
global_bornForce
,
__global
float* energyBuffer, __global
float4* posq, __global float* global_bornRadii,
__local AtomData
2
* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles) {
#else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment