Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cf4c6d52
"platforms/vscode:/vscode.git/clone" did not exist on "90ddfc312ce93b34ebaf857d2e4fbef0dbe8e075"
Commit
cf4c6d52
authored
Sep 21, 2011
by
Peter Eastman
Browse files
Optimizations to GB
parent
8b3c38f9
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
24 deletions
+64
-24
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+10
-4
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+54
-20
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
cf4c6d52
...
...
@@ -1544,6 +1544,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
index
=
0
;
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
...
...
@@ -1563,6 +1565,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
}
else
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionIndices
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getExclusionRowIndices
().
getDeviceBuffer
());
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
...
...
@@ -2304,12 +2308,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
cl
.
getNonbondedUtilities
().
addArgument
(
arguments
[
i
]);
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
if
(
useLong
)
cl
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDeviceBuffer
(),
2
*
longEnergyDerivs
->
getSize
());
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
));
}
if
(
useLong
)
cl
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDeviceBuffer
(),
2
*
longEnergyDerivs
->
getSize
());
}
}
double
OpenCLCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
cf4c6d52
...
...
@@ -23,10 +23,11 @@ __kernel void computeBornSum(
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData1*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
,
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
,
#
endif
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
...
...
@@ -39,6 +40,8 @@ __kernel void computeBornSum(
#
endif
unsigned
int
lasty
=
0xFFFFFFFF
;
__local
int2
reservedBlocks[WARPS_PER_GROUP]
;
__local
unsigned
int*
exclusionRange
=
(
__local
unsigned
int*
)
reservedBlocks
;
__local
int
exclusionIndex[WARPS_PER_GROUP]
;
do
{
//
Extract
the
coordinates
of
this
tile
...
...
@@ -126,7 +129,18 @@ __kernel void computeBornSum(
localData[get_local_id(0)].bornSum = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
bool computeSubset = false;
if (flags != 0xFFFFFFFF) {
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
computeSubset = (exclusionIndex[localGroupIndex] == -1);
}
if (computeSubset) {
if (flags == 0) {
// No interactions in this tile.
}
...
...
@@ -330,10 +344,11 @@ __kernel void computeGBSAForce1(
__global float* energyBuffer, __global float4* posq, __global float* global_bornRadii,
__local AtomData2* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
) {
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
,
#else
unsigned int numTiles
) {
unsigned int numTiles
,
#endif
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF
...
...
@@ -347,6 +362,8 @@ __kernel void computeGBSAForce1(
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
__local int2 reservedBlocks[WARPS_PER_GROUP];
__local unsigned int* exclusionRange = (__local unsigned int*) reservedBlocks;
__local int exclusionIndex[WARPS_PER_GROUP];
do {
// Extract the coordinates of this tile
...
...
@@ -393,6 +410,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius;
...
...
@@ -405,17 +425,13 @@ __kernel void computeGBSAForce1(
float Gpol = tempEnergy*RECIP(denominator2);
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > CUTOFF_SQUARED) {
dEdR = 0.0f;
tempEnergy = 0.0f;
dGpol_dalpha2_ij = 0.0f;
}
#endif
force.w += dGpol_dalpha2_ij*bornRadius2;
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
#ifdef USE_CUTOFF
}
#endif
}
}
}
...
...
@@ -437,7 +453,18 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].fw = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
bool computeSubset = false;
if (flags != 0xFFFFFFFF) {
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
computeSubset = (exclusionIndex[localGroupIndex] == -1);
}
if (computeSubset) {
if (flags == 0) {
// No interactions in this tile.
}
...
...
@@ -454,6 +481,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+j].bornRadius;
...
...
@@ -480,6 +510,11 @@ __kernel void computeGBSAForce1(
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = (float4) (delta.xyz, dGpol_dalpha2_ij*bornRadius1);
#ifdef USE_CUTOFF
}
else
tempBuffer[get_local_id(0)] = (float4) 0.0f;
#endif
// Sum the forces on atom j.
...
...
@@ -512,6 +547,9 @@ __kernel void computeGBSAForce1(
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
float invR = RSQRT(r2);
float r = RECIP(invR);
float bornRadius2 = localData[tbx+tj].bornRadius;
...
...
@@ -524,13 +562,6 @@ __kernel void computeGBSAForce1(
float Gpol = tempEnergy*RECIP(denominator2);
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > CUTOFF_SQUARED) {
dEdR = 0.0f;
tempEnergy = 0.0f;
dGpol_dalpha2_ij = 0.0f;
}
#endif
force.w += dGpol_dalpha2_ij*bornRadius2;
energy += tempEnergy;
delta.xyz *= dEdR;
...
...
@@ -539,6 +570,9 @@ __kernel void computeGBSAForce1(
localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z;
localData[tbx+tj].fw += dGpol_dalpha2_ij*bornRadius1;
#ifdef USE_CUTOFF
}
#endif
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment