Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dca54ec7
Commit
dca54ec7
authored
Jun 30, 2016
by
Saurabh Belsare
Browse files
Merged fork with latest original master
parents
cace5edf
01f9e415
Changes
384
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
322 additions
and
302 deletions
+322
-302
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+10
-0
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+4
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+5
-2
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+4
-1
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+28
-30
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+28
-30
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+22
-24
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+6
-3
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+56
-60
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+44
-48
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+28
-30
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+22
-24
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+20
-17
platforms/opencl/src/kernels/torsionForce.cl
platforms/opencl/src/kernels/torsionForce.cl
+5
-0
platforms/opencl/tests/OpenCLTests.h
platforms/opencl/tests/OpenCLTests.h
+3
-3
platforms/opencl/tests/TestOpenCLCheckpoints.cpp
platforms/opencl/tests/TestOpenCLCheckpoints.cpp
+9
-0
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+2
-2
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+2
-2
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+2
-2
No files found.
platforms/opencl/src/kernels/cmapTorsionForce.cl
View file @
dca54ec7
...
...
@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f;
real4
v0a
=
(
real4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real4
v1a
=
(
real4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real4
v2a
=
(
real4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#
endif
real4
cp0a
=
cross
(
v0a,
v1a
)
;
real4
cp1a
=
cross
(
v1a,
v2a
)
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
...
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
dca54ec7
...
...
@@ -30,6 +30,10 @@
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
-prefactor*erfAlphaR
;
}
else
{
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
}
else
{
#
if
HAS_LENNARD_JONES
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
View file @
dca54ec7
...
...
@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
delta
(
real4
vec1,
real4
vec2
)
{
real4
delta
(
real4
vec1,
real4
vec2,
bool
periodic,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
...
...
@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) {
*
Compute
the
forces
on
groups
based
on
the
bonds.
*/
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__global
const
int*
restrict
bondGroups
__global
const
int*
restrict
bondGroups
,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
mixed
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
...
...
platforms/opencl/src/kernels/customCompoundBond.cl
View file @
dca54ec7
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
ccb_delta
(
real4
vec1,
real4
vec2
)
{
real4
ccb_delta
(
real4
vec1,
real4
vec2,
bool
periodic,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
dca54ec7
...
...
@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -204,42 +206,38 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
dca54ec7
...
...
@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -220,35 +222,31 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
nextToSkip
=
end
;
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
else
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
dca54ec7
...
...
@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -178,42 +180,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
dca54ec7
...
...
@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -188,35 +190,31 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
nextToSkip
=
end
;
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
else
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
dca54ec7
...
...
@@ -55,7 +55,7 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
/**
*
Determine
whether
a
particular
interaction
is
in
the
list
of
exclusions.
*/
inline
bool
isInteractionExcluded
(
int
atom1,
int
atom2,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
)
{
inline
bool
isInteractionExcluded
(
int
atom1,
int
atom2,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
int
first
=
exclusionStartIndex[atom1]
;
int
last
=
exclusionStartIndex[atom1+1]
;
for
(
int
i
=
last-1
; i >= first; i--) {
...
...
@@ -174,7 +174,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
#
ifdef
USE_EXCLUSIONS
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
#
endif
)
{
__local
real4
positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE]
;
...
...
@@ -264,7 +264,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
}
}
}
numNeighborsForAtom[atom1]
=
totalNeighborsForAtom1
;
if
(
atom1
<
NUM_ATOMS
)
numNeighborsForAtom[atom1]
=
totalNeighborsForAtom1
;
SYNC_WARPS
;
}
}
...
...
@@ -307,6 +309,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
numNeighborsForAtom[globalIndex]
=
0
; // Clear this so the next kernel can use it as a counter
}
globalOffset
+=
posBuffer[get_local_size
(
0
)
-1]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_local_id
(
0
)
==
0
)
neighborStartIndex[0]
=
0
;
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
dca54ec7
...
...
@@ -169,6 +169,8 @@ __kernel void computeBornSum(
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
if
(
numTiles
>
maxTiles
)
return
; // There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
int
end
=
(
int
)
((
warp+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
(
long
)
numTiles
)
/totalWarps
)
;
#
else
...
...
@@ -190,42 +192,38 @@ __kernel void computeBornSum(
int
x,
y
;
bool
singlePeriodicCopy
=
false
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
}
else
#
endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
#
else
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
...
...
@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -577,42 +577,38 @@ __kernel void computeGBSAForce1(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
dca54ec7
...
...
@@ -178,6 +178,8 @@ __kernel void computeBornSum(
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
if
(
numTiles
>
maxTiles
)
return
; // There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
))
;
int
end
=
(
int
)
((
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
((
long
)
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
))
;
#
else
...
...
@@ -196,35 +198,31 @@ __kernel void computeBornSum(
int
x,
y
;
bool
singlePeriodicCopy
=
false
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
}
else
#
endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
tiles[pos]
;
real4
blockSizeX
=
blockSize[x]
;
singlePeriodicCopy
=
(
0.5f*periodicBoxSize.x-blockSizeX.x
>=
CUTOFF
&&
0.5f*periodicBoxSize.y-blockSizeX.y
>=
CUTOFF
&&
0.5f*periodicBoxSize.z-blockSizeX.z
>=
CUTOFF
)
;
#
else
y
=
(
int
)
floor
(
NUM_BLOCKS+0.5f-SQRT
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if
(
x
<
y
|
| x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
includeTile = (nextToSkip != pos);
else
nextToSkip = end;
}
includeTile = (nextToSkip != pos);
#endif
if (includeTile) {
// Load the data for this tile.
...
...
@@ -593,6 +591,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -611,35 +611,31 @@ __kernel void computeGBSAForce1(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
nextToSkip
=
end
;
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
else
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
dca54ec7
...
...
@@ -200,6 +200,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long)numTileIndices/totalWarps : warp*(long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long)numTileIndices/totalWarps : (warp+1)*(long)numTiles/totalWarps);
#else
...
...
@@ -223,42 +225,38 @@ __kernel void computeNonbonded(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
dca54ec7
...
...
@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
#else
...
...
@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
includeTile = (nextToSkip != pos);
else
nextToSkip = end;
}
includeTile = (nextToSkip != pos);
#endif
if (includeTile) {
// Load the data for this tile.
...
...
platforms/opencl/src/kernels/pme.cl
View file @
dca54ec7
__kernel
void
updateBsplines
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
pmeBsplineTheta,
__local
real4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
int2*
restrict
pmeAtomGridIndex,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
real4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
real4
pos
=
posq[i]
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
@@ -83,8 +85,9 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
//
Process
the
atoms
in
spatially
sorted
order.
This
improves
efficiency
when
writing
...
...
@@ -93,9 +96,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
int
atom
=
pmeAtomGridIndex[i].x
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
@@ -118,7 +119,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
...
...
@@ -165,7 +166,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
}
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
...
...
@@ -179,9 +181,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for
(
int
i
=
0
; i < NUM_ATOMS; i++) {
int
atom
=
i
;//pmeAtomGridIndex[i].x;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
@@ -362,12 +362,17 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
#
ifdef
USE_PME_STREAM
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
#
endif
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
...
@@ -378,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int
atom
=
pmeAtomGridIndex[i].x
;
real4
force
=
0.0f
;
real4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*recipBoxVecX.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*recipBoxVecY.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*recipBoxVecZ.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z
)
;
...
...
@@ -403,7 +406,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
ddata[0]
=
-data[0]
;
for
(
int
j
=
1
; j < PME_ORDER; j++)
...
...
platforms/opencl/src/kernels/torsionForce.cl
View file @
dca54ec7
...
...
@@ -2,6 +2,11 @@ const real PI = 3.14159265358979323846f;
real4
v0
=
(
real4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real4
v1
=
(
real4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real4
v2
=
(
real4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
APPLY_PERIODIC_TO_DELTA
(
v2
)
#
endif
real4
cp0
=
cross
(
v0,
v1
)
;
real4
cp1
=
cross
(
v1,
v2
)
;
real
cosangle
=
dot
(
normalize
(
cp0
)
,
normalize
(
cp1
))
;
...
...
platforms/opencl/tests/OpenCLTests.h
View file @
dca54ec7
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors.
*
* Portions copyright (c) 2015
-2016
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -39,9 +39,9 @@ OpenMM::OpenCLPlatform platform;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"
OpenCL
Precision"
,
std
::
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
if
(
argc
>
2
)
platform
.
setPropertyDefaultValue
(
"OpenCLPlatformIndex"
,
std
::
string
(
argv
[
2
]));
if
(
argc
>
3
)
platform
.
setPropertyDefaultValue
(
"
OpenCL
DeviceIndex"
,
std
::
string
(
argv
[
3
]));
platform
.
setPropertyDefaultValue
(
"DeviceIndex"
,
std
::
string
(
argv
[
3
]));
}
platforms/opencl/tests/TestOpenCLCheckpoints.cpp
View file @
dca54ec7
...
...
@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2
.
step
(
10
);
State
s8
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s6
,
s8
);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator
integrator3
(
0.001
);
Context
context3
(
system
,
integrator3
,
platform
);
stream1
.
seekg
(
0
,
stream1
.
beg
);
context3
.
loadCheckpoint
(
stream1
);
State
s9
=
context3
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
|
State
::
Energy
);
compareStates
(
s1
,
s9
);
}
void
runPlatformTests
()
{
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
dca54ec7
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
dca54ec7
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-20
09
Stanford University and the Authors. *
* Portions copyright (c) 2008-20
16
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
dca54ec7
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-20
09
Stanford University and the Authors. *
* Portions copyright (c) 2008-20
16
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment