Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cf4c6d52
Commit
cf4c6d52
authored
Sep 21, 2011
by
Peter Eastman
Browse files
Optimizations to GB
parent
8b3c38f9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
24 deletions
+64
-24
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+10
-4
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+54
-20
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
cf4c6d52
...
...
@@ -1544,6 +1544,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
index
=
0
;
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
...
...
@@ -1563,6 +1565,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
...
...
@@ -2304,12 +2308,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
cl
.
getNonbondedUtilities
().
addArgument
(
arguments
[
i
]);
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
));
}
if
(
useLong
)
cl
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDeviceBuffer
(),
2
*
longEnergyDerivs
->
getSize
());
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
));
}
}
}
double
OpenCLCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
cf4c6d52
...
...
@@ -23,10 +23,11 @@ __kernel void computeBornSum(
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData1*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
,
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
,
#
endif
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
...
...
@@ -39,6 +40,8 @@ __kernel void computeBornSum(
#
endif
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
int2
reservedBlocks[WARPS_PER_GROUP]
;
__local
unsigned
int*
exclusionRange
=
(
__local
unsigned
int*
)
reservedBlocks
;
__local
int
exclusionIndex[WARPS_PER_GROUP]
;
do
{
//
Extract
the
coordinates
of
this
tile
...
...
@@ -126,7 +129,18 @@ __kernel void computeBornSum(
localData[get_local_id(0)].bornSum = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
bool computeSubset = false;
if (flags != 0xFFFFFFFF) {
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
computeSubset = (exclusionIndex[localGroupIndex] == -1);
}
if (computeSubset) {
if (flags == 0) {
// No interactions in this tile.
}
...
...
@@ -330,10 +344,11 @@ __kernel void computeGBSAForce1(
__global float* energyBuffer, __global float4* posq, __global float* global_bornRadii,
__local AtomData2* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
) {
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
,
#else
unsigned int numTiles
) {
unsigned int numTiles
,
#endif
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF
...
...
@@ -347,6 +362,8 @@ __kernel void computeGBSAForce1(
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
__local int2 reservedBlocks[WARPS_PER_GROUP];
__local unsigned int* exclusionRange = (__local unsigned int*) reservedBlocks;
__local int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
...
...
@@ -393,6 +410,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius;
...
...
@@ -405,17 +425,13 @@ __kernel void computeGBSAForce1(
float Gpol = tempEnergy*RECIP(denominator2);
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > CUTOFF_SQUARED) {
dEdR = 0.0f;
tempEnergy = 0.0f;
dGpol_dalpha2_ij = 0.0f;
}
#endif
force.w += dGpol_dalpha2_ij*bornRadius2;
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
#ifdef USE_CUTOFF
}
#endif
}
}
}
...
...
@@ -437,7 +453,18 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].fw = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
bool computeSubset = false;
if (flags != 0xFFFFFFFF) {
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
computeSubset = (exclusionIndex[localGroupIndex] == -1);
}
if (computeSubset) {
if (flags == 0) {
// No interactions in this tile.
}
...
...
@@ -454,6 +481,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius;
...
...
@@ -480,6 +510,11 @@ __kernel void computeGBSAForce1(
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = (float4) (delta.xyz, dGpol_dalpha2_ij*bornRadius1);
#ifdef USE_CUTOFF
}
else
tempBuffer[get_local_id(0)] = (float4) 0.0f;
#endif
// Sum the forces on atom j.
...
...
@@ -512,6 +547,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+tj].bornRadius;
...
...
@@ -524,13 +562,6 @@ __kernel void computeGBSAForce1(
float Gpol = tempEnergy*RECIP(denominator2);
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > CUTOFF_SQUARED) {
dEdR = 0.0f;
tempEnergy = 0.0f;
dGpol_dalpha2_ij = 0.0f;
}
#endif
force.w += dGpol_dalpha2_ij*bornRadius2;
energy += tempEnergy;
delta.xyz *= dEdR;
...
...
@@ -539,6 +570,9 @@ __kernel void computeGBSAForce1(
localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z;
localData[tbx+tj].fw += dGpol_dalpha2_ij*bornRadius1;
#ifdef USE_CUTOFF
}
#endif
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment