Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
91f3379b
Commit
91f3379b
authored
Sep 09, 2011
by
Peter Eastman
Browse files
Minor optimizations to GBSA
parent
d2a5b3bb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
25 deletions
+25
-25
platforms/opencl/src/kernels/gbsaObc2.cl
platforms/opencl/src/kernels/gbsaObc2.cl
+3
-7
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+20
-16
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+2
-2
No files found.
platforms/opencl/src/kernels/gbsaObc2.cl
View file @
91f3379b
{
float
invRSquared
=
RECIP
(
r2
)
;
float
invRSquared
Over4
=
0.25f*invR*invR
;
float
rScaledRadiusJ
=
r+obcParams2.y
;
float
rScaledRadiusI
=
r+obcParams1.y
;
float
l_ijJ
=
RECIP
(
max
(
obcParams1.x,
fabs
(
r-obcParams2.y
)))
;
...
...
@@ -14,12 +14,8 @@
float
t1I
=
LOG
(
u_ijI*RECIP
(
l_ijI
))
;
float
t2J
=
(
l_ij2J-u_ij2J
)
;
float
t2I
=
(
l_ij2I-u_ij2I
)
;
float
t3J
=
t2J*invR
;
float
t3I
=
t2I*invR
;
t1J
*=
invR
;
t1I
*=
invR
;
float
term1
=
0.125f*
(
1.0f+obcParams2.y*obcParams2.y*invRSquared
)
*t3J
+
0.25f*t1J*invRSquared
;
float
term2
=
0.125f*
(
1.0f+obcParams1.y*obcParams1.y*invRSquared
)
*t3I
+
0.25f*t1I*invRSquared
;
float
term1
=
(
0.5f*
(
0.25f+obcParams2.y*obcParams2.y*invRSquaredOver4
)
*t2J
+
t1J*invRSquaredOver4
)
*invR
;
float
term2
=
(
0.5f*
(
0.25f+obcParams1.y*obcParams1.y*invRSquaredOver4
)
*t2I
+
t1I*invRSquaredOver4
)
*invR
;
float
tempdEdR
=
select
(
0.0f,
bornForce1*term1,
obcParams1.x
<
rScaledRadiusJ
)
;
tempdEdR
+=
select
(
0.0f,
bornForce2*term2,
obcParams2.x
<
rScaledRadiusJ
)
;
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
91f3379b
...
...
@@ -7,12 +7,9 @@
typedef
struct
{
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz,
fw
;
float
radius,
scaledRadius
;
float
bornSum
;
float
bornRadius
;
float
bornForce
;
}
AtomData
;
}
AtomData1
;
/**
*
Compute
the
Born
sum.
...
...
@@ -24,7 +21,7 @@ __kernel void computeBornSum(
__global
float*
global_bornSum,
#
endif
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__local
AtomData
1
*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
#
else
...
...
@@ -104,8 +101,8 @@ __kernel void computeBornSum(
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = LOG(u_ij * RECIP(l_ij));
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) +
(0.50f*invR*ratio) +
(
0.25f*
params2.y*params2.y*invR)*(l_ij2-u_ij2);
bornSum += l_ij - u_ij +
(0.50f*invR*ratio) +
0.25f*
(
r*(u_ij2-l_ij2) +
(params2.y*params2.y*invR)*(l_ij2-u_ij2)
)
;
if (params1.x < params2.x-r)
bornSum += 2.0f*(RECIP(params1.x)-l_ij);
}
...
...
@@ -161,8 +158,8 @@ __kernel void computeBornSum(
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = LOG(u_ij * RECIP(l_ij));
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) +
(0.50f*invR*ratio) +
(
0.25f*
params2.y*params2.y*invR)*(l_ij2-u_ij2);
bornSum += l_ij - u_ij +
(0.50f*invR*ratio) +
0.25f*
(
r*(u_ij2-l_ij2) +
(params2.y*params2.y*invR)*(l_ij2-u_ij2)
)
;
if (params1.x < params2.x-r)
bornSum += 2.0f*(RECIP(params1.x)-l_ij);
}
...
...
@@ -173,8 +170,8 @@ __kernel void computeBornSum(
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = LOG(u_ij * RECIP(l_ij));
float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) +
(0.50f*invR*ratio) +
(
0.25f*
params1.y*params1.y*invR)*(l_ij2-u_ij2);
float term = l_ij - u_ij +
(0.50f*invR*ratio) +
0.25f*
(
r*(u_ij2-l_ij2) +
(params1.y*params1.y*invR)*(l_ij2-u_ij2)
)
;
if (params2.x < params1.x-r)
term += 2.0f*(RECIP(params2.x)-l_ij);
tempBuffer[get_local_id(0)] = term;
...
...
@@ -220,8 +217,8 @@ __kernel void computeBornSum(
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = LOG(u_ij * RECIP(l_ij));
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) +
(0.50f*invR*ratio) +
(
0.25f*
params2.y*params2.y*invR)*(l_ij2-u_ij2);
bornSum += l_ij - u_ij +
(0.50f*invR*ratio) +
0.25f*
(
r*(u_ij2-l_ij2) +
(params2.y*params2.y*invR)*(l_ij2-u_ij2)
)
;
if (params1.x < params2.x-r)
bornSum += 2.0f*(RECIP(params1.x)-l_ij);
}
...
...
@@ -232,8 +229,8 @@ __kernel void computeBornSum(
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = LOG(u_ij * RECIP(l_ij));
float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) +
(0.50f*invR*ratio) +
(
0.25f*
params1.y*params1.y*invR)*(l_ij2-u_ij2);
float term = l_ij - u_ij +
(0.50f*invR*ratio) +
0.25f*
(
r*(u_ij2-l_ij2) +
(params1.y*params1.y*invR)*(l_ij2-u_ij2)
)
;
if (params2.x < params1.x-r)
term += 2.0f*(RECIP(params2.x)-l_ij);
localData[tbx+tj].bornSum += term;
...
...
@@ -313,6 +310,13 @@ __kernel void computeBornSum(
} while (pos < end);
}
typedef struct {
float x, y, z;
float q;
float fx, fy, fz, fw;
float bornRadius;
} AtomData2;
/**
* First part of computing the GBSA interaction.
*/
...
...
@@ -324,7 +328,7 @@ __kernel void computeGBSAForce1(
__global float4* forceBuffers, __global float* global_bornForce,
#endif
__global float* energyBuffer, __global float4* posq, __global float* global_bornRadii,
__local AtomData* localData, __local float4* tempBuffer,
__local AtomData
2
* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags) {
#else
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
91f3379b
...
...
@@ -159,7 +159,7 @@ __kernel void computeNonbonded(
else {
// Compute only a subset of the interactions in this tile.
for (
unsigned int
j = 0; j < TILE_SIZE; j++) {
for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
int atom2 = tbx+j;
...
...
@@ -230,7 +230,7 @@ __kernel void computeNonbonded(
excl = (excl >> tgx) |
(
excl
<<
(
TILE_SIZE
-
tgx
))
;
#
endif
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment