Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5a06df78
Commit
5a06df78
authored
Mar 04, 2020
by
tic20
Browse files
Merge
https://github.com/openmm/openmm
parents
8dd60914
a9223eea
Changes
335
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
234 additions
and
1182 deletions
+234
-1182
platforms/opencl/src/kernels/cmapTorsionForce.cl
platforms/opencl/src/kernels/cmapTorsionForce.cl
+0
-122
platforms/opencl/src/kernels/common.cl
platforms/opencl/src/kernels/common.cl
+61
-0
platforms/opencl/src/kernels/customCompoundBond.cl
platforms/opencl/src/kernels/customCompoundBond.cl
+0
-41
platforms/opencl/src/kernels/customExternalForce.cl
platforms/opencl/src/kernels/customExternalForce.cl
+0
-2
platforms/opencl/src/kernels/customGBChainRule.cl
platforms/opencl/src/kernels/customGBChainRule.cl
+0
-19
platforms/opencl/src/kernels/customGBGradientChainRule.cl
platforms/opencl/src/kernels/customGBGradientChainRule.cl
+0
-17
platforms/opencl/src/kernels/customIntegrator.cl
platforms/opencl/src/kernels/customIntegrator.cl
+0
-97
platforms/opencl/src/kernels/customIntegratorPerDof.cl
platforms/opencl/src/kernels/customIntegratorPerDof.cl
+0
-49
platforms/opencl/src/kernels/customNonbonded.cl
platforms/opencl/src/kernels/customNonbonded.cl
+0
-23
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+0
-543
platforms/opencl/src/kernels/gbsaObc2.cl
platforms/opencl/src/kernels/gbsaObc2.cl
+0
-27
platforms/opencl/src/kernels/harmonicAngleForce.cl
platforms/opencl/src/kernels/harmonicAngleForce.cl
+0
-4
platforms/opencl/src/kernels/harmonicBondForce.cl
platforms/opencl/src/kernels/harmonicBondForce.cl
+0
-4
platforms/opencl/src/kernels/integrationUtilities.cl
platforms/opencl/src/kernels/integrationUtilities.cl
+0
-13
platforms/opencl/src/kernels/langevin.cl
platforms/opencl/src/kernels/langevin.cl
+0
-123
platforms/opencl/src/kernels/nonbondedExceptions.cl
platforms/opencl/src/kernels/nonbondedExceptions.cl
+3
-0
platforms/opencl/src/kernels/noseHooverChain.cl
platforms/opencl/src/kernels/noseHooverChain.cl
+170
-0
platforms/opencl/src/kernels/periodicTorsionForce.cl
platforms/opencl/src/kernels/periodicTorsionForce.cl
+0
-5
platforms/opencl/src/kernels/random.cl
platforms/opencl/src/kernels/random.cl
+0
-70
platforms/opencl/src/kernels/rbTorsionForce.cl
platforms/opencl/src/kernels/rbTorsionForce.cl
+0
-23
No files found.
platforms/opencl/src/kernels/cmapTorsionForce.cl
deleted
100644 → 0
View file @
8dd60914
const
real
PI
=
3.14159265358979323846f
;
//
Compute
the
first
angle.
real4
v0a
=
(
real4
)
(
pos1.xyz-pos2.xyz,
0.0f
)
;
real4
v1a
=
(
real4
)
(
pos3.xyz-pos2.xyz,
0.0f
)
;
real4
v2a
=
(
real4
)
(
pos3.xyz-pos4.xyz,
0.0f
)
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#
endif
real4
cp0a
=
cross
(
v0a,
v1a
)
;
real4
cp1a
=
cross
(
v1a,
v2a
)
;
real
cosangle
=
dot
(
normalize
(
cp0a
)
,
normalize
(
cp1a
))
;
real
angleA
;
if
(
cosangle
>
0.99f
|
| cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 cross_prod = cross(cp0a, cp1a);
real scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
}
else
angleA = acos(cosangle);
angleA = (dot(v0a, cp1a) >= 0 ? angleA : -angleA);
angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
real angleB;
if (cosangle > 0.99f |
|
cosangle
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
real4
cross_prod
=
cross
(
cp0b,
cp1b
)
;
real
scale
=
dot
(
cp0b,
cp0b
)
*dot
(
cp1b,
cp1b
)
;
angleB
=
asin
(
SQRT
(
dot
(
cross_prod,
cross_prod
)
/scale
))
;
if
(
cosangle
<
0.0f
)
angleB
=
PI-angleB
;
}
else
angleB
=
acos
(
cosangle
)
;
angleB
=
(
dot
(
v0b,
cp1b
)
>=
0
?
angleB
:
-angleB
)
;
angleB
=
fmod
(
angleB+2.0f*PI,
2.0f*PI
)
;
//
Identify
which
patch
this
is
in.
int2
pos
=
MAP_POS[MAPS[index]]
;
int
size
=
pos.y
;
real
delta
=
2*PI/size
;
int
s
=
(
int
)
(
angleA/delta
)
;
int
t
=
(
int
)
(
angleB/delta
)
;
float4
c[4]
;
int
coeffIndex
=
pos.x+4*
(
s+size*t
)
;
c[0]
=
COEFF[coeffIndex]
;
c[1]
=
COEFF[coeffIndex+1]
;
c[2]
=
COEFF[coeffIndex+2]
;
c[3]
=
COEFF[coeffIndex+3]
;
real
da
=
angleA/delta-s
;
real
db
=
angleB/delta-t
;
//
Evaluate
the
spline
to
determine
the
energy
and
gradients.
real
torsionEnergy
=
0.0f
;
real
dEdA
=
0.0f
;
real
dEdB
=
0.0f
;
torsionEnergy
=
da*torsionEnergy
+
((
c[3].w*db
+
c[3].z
)
*db
+
c[3].y
)
*db
+
c[3].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].w*da
+
2.0f*c[2].w
)
*da
+
c[1].w
;
dEdB
=
da*dEdB
+
(
3.0f*c[3].w*db
+
2.0f*c[3].z
)
*db
+
c[3].y
;
torsionEnergy
=
da*torsionEnergy
+
((
c[2].w*db
+
c[2].z
)
*db
+
c[2].y
)
*db
+
c[2].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].z*da
+
2.0f*c[2].z
)
*da
+
c[1].z
;
dEdB
=
da*dEdB
+
(
3.0f*c[2].w*db
+
2.0f*c[2].z
)
*db
+
c[2].y
;
torsionEnergy
=
da*torsionEnergy
+
((
c[1].w*db
+
c[1].z
)
*db
+
c[1].y
)
*db
+
c[1].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].y*da
+
2.0f*c[2].y
)
*da
+
c[1].y
;
dEdB
=
da*dEdB
+
(
3.0f*c[1].w*db
+
2.0f*c[1].z
)
*db
+
c[1].y
;
torsionEnergy
=
da*torsionEnergy
+
((
c[0].w*db
+
c[0].z
)
*db
+
c[0].y
)
*db
+
c[0].x
;
dEdA
=
db*dEdA
+
(
3.0f*c[3].x*da
+
2.0f*c[2].x
)
*da
+
c[1].x
;
dEdB
=
da*dEdB
+
(
3.0f*c[0].w*db
+
2.0f*c[0].z
)
*db
+
c[0].y
;
dEdA
/=
delta
;
dEdB
/=
delta
;
energy
+=
torsionEnergy
;
//
Apply
the
force
to
the
first
torsion.
real
normCross1
=
dot
(
cp0a,
cp0a
)
;
real
normSqrBC
=
dot
(
v1a,
v1a
)
;
real
normBC
=
SQRT
(
normSqrBC
)
;
real
normCross2
=
dot
(
cp1a,
cp1a
)
;
real
dp
=
1.0f/normSqrBC
;
real4
ff
=
(
real4
)
((
-dEdA*normBC
)
/normCross1,
dot
(
v0a,
v1a
)
*dp,
dot
(
v2a,
v1a
)
*dp,
(
dEdA*normBC
)
/normCross2
)
;
real4
force1
=
ff.x*cp0a
;
real4
force4
=
ff.w*cp1a
;
real4
d
=
ff.y*force1
-
ff.z*force4
;
real4
force2
=
d-force1
;
real4
force3
=
-d-force4
;
//
Apply
the
force
to
the
second
torsion.
normCross1
=
dot
(
cp0b,
cp0b
)
;
normSqrBC
=
dot
(
v1b,
v1b
)
;
normBC
=
SQRT
(
normSqrBC
)
;
normCross2
=
dot
(
cp1b,
cp1b
)
;
dp
=
1.0f/normSqrBC
;
ff
=
(
real4
)
((
-dEdB*normBC
)
/normCross1,
dot
(
v0b,
v1b
)
*dp,
dot
(
v2b,
v1b
)
*dp,
(
dEdB*normBC
)
/normCross2
)
;
real4
force5
=
ff.x*cp0b
;
real4
force8
=
ff.w*cp1b
;
d
=
ff.y*force5
-
ff.z*force8
;
real4
force6
=
d-force5
;
real4
force7
=
-d-force8
;
platforms/opencl/src/kernels/common.cl
0 → 100644
View file @
5a06df78
/**
*
This
file
contains
OpenCL
definitions
for
the
macros
and
functions
needed
for
the
*
common
compute
framework.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
endif
#
define
KERNEL
__kernel
#
define
DEVICE
#
define
LOCAL
__local
#
define
LOCAL_ARG
__local
#
define
GLOBAL
__global
#
define
RESTRICT
restrict
#
define
LOCAL_ID
get_local_id
(
0
)
#
define
LOCAL_SIZE
get_local_size
(
0
)
#
define
GLOBAL_ID
get_global_id
(
0
)
#
define
GLOBAL_SIZE
get_global_size
(
0
)
#
define
GROUP_ID
get_group_id
(
0
)
#
define
NUM_GROUPS
get_num_groups
(
0
)
#
define
SYNC_THREADS
barrier
(
CLK_LOCAL_MEM_FENCE+CLK_GLOBAL_MEM_FENCE
)
;
#
define
MEM_FENCE
mem_fence
(
CLK_LOCAL_MEM_FENCE+CLK_GLOBAL_MEM_FENCE
)
;
#
define
ATOMIC_ADD
(
dest,
value
)
atom_add
(
dest,
value
)
typedef
long
mm_long
;
typedef
unsigned
long
mm_ulong
;
#
define
make_short2
(
x...
)
((
short2
)
(
x
))
#
define
make_short3
(
x...
)
((
short3
)
(
x
))
#
define
make_short4
(
x...
)
((
short4
)
(
x
))
#
define
make_int2
(
x...
)
((
int2
)
(
x
))
#
define
make_int3
(
x...
)
((
int3
)
(
x
))
#
define
make_int4
(
x...
)
((
int4
)
(
x
))
#
define
make_float2
(
x...
)
((
float2
)
(
x
))
#
define
make_float3
(
x...
)
((
float3
)
(
x
))
#
define
make_float4
(
x...
)
((
float4
)
(
x
))
#
define
make_double2
(
x...
)
((
double2
)
(
x
))
#
define
make_double3
(
x...
)
((
double3
)
(
x
))
#
define
make_double4
(
x...
)
((
double4
)
(
x
))
#
define
trimTo3
(
v
)
(
v
)
.
xyz
//
OpenCL
has
overloaded
versions
of
standard
math
functions
for
single
and
double
//
precision
arguments.
CUDA
has
separate
functions.
To
allow
them
to
be
called
//
consistently,
we
define
the
"single precision"
functions
to
just
be
synonyms
//
for
the
standard
ones.
#
define
sqrtf
(
x
)
sqrt
(
x
)
#
define
rsqrtf
(
x
)
rsqrt
(
x
)
#
define
expf
(
x
)
exp
(
x
)
#
define
logf
(
x
)
log
(
x
)
#
define
powf
(
x
)
pow
(
x
)
#
define
cosf
(
x
)
cos
(
x
)
#
define
sinf
(
x
)
sin
(
x
)
#
define
tanf
(
x
)
tan
(
x
)
#
define
acosf
(
x
)
acos
(
x
)
#
define
asinf
(
x
)
asin
(
x
)
#
define
atanf
(
x
)
atan
(
x
)
#
define
atan2f
(
x,
y
)
atan2
(
x,
y
)
platforms/opencl/src/kernels/customCompoundBond.cl
deleted
100644 → 0
View file @
8dd60914
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
ccb_delta
(
real4
vec1,
real4
vec2,
bool
periodic,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
real
ccb_computeAngle
(
real4
vec1,
real4
vec2
)
{
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
real4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
}
else
angle
=
acos
(
cosine
)
;
return
angle
;
}
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
ccb_computeCross
(
real4
vec1,
real4
vec2
)
{
real4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
platforms/opencl/src/kernels/customExternalForce.cl
deleted
100644 → 0
View file @
8dd60914
COMPUTE_FORCE
real4
force1
=
(
real4
)
(
-dEdX,
-dEdY,
-dEdZ,
0
)
;
platforms/opencl/src/kernels/customGBChainRule.cl
deleted
100644 → 0
View file @
8dd60914
#
ifdef
USE_CUTOFF
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
endif
#
ifdef
USE_SYMMETRIC
real
tempForce
=
0.0f
;
#
else
real4
tempForce1
=
(
real4
)
0
;
real4
tempForce2
=
(
real4
)
0
;
#
endif
COMPUTE_FORCE
#
ifdef
USE_SYMMETRIC
dEdR
+=
tempForce*invR
;
#
else
dEdR1
+=
tempForce1
;
dEdR2
+=
tempForce2
;
#
endif
}
platforms/opencl/src/kernels/customGBGradientChainRule.cl
deleted
100644 → 0
View file @
8dd60914
/**
*
Compute
chain
rule
terms
for
computed
values
that
depend
explicitly
on
particle
coordinates.
*/
__kernel
void
computeGradientChainRuleTerms
(
__global
real4*
restrict
forceBuffers,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
real4
pos
=
posq[index]
;
real4
force
=
forceBuffers[index]
;
COMPUTE_FORCES
forceBuffers[index]
=
force
;
index
+=
get_global_size
(
0
)
;
}
SAVE_PARAM_DERIVS
}
platforms/opencl/src/kernels/customIntegrator.cl
deleted
100644 → 0
View file @
8dd60914
__kernel
void
computeFloatSum
(
__global
const
float*
restrict
sumBuffer,
__global
float*
result,
int
bufferSize
)
{
__local
float
tempBuffer[WORK_GROUP_SIZE]
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
float
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
sumBuffer[index]
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < WORK_GROUP_SIZE; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
WORK_GROUP_SIZE
)
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
if
(
thread
==
0
)
*result
=
tempBuffer[0]
;
}
#
ifdef
SUPPORTS_DOUBLE_PRECISION
__kernel
void
computeDoubleSum
(
__global
const
double*
restrict
sumBuffer,
__global
double*
result,
int
bufferSize
)
{
__local
double
tempBuffer[WORK_GROUP_SIZE]
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
double
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
sumBuffer[index]
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < WORK_GROUP_SIZE; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
WORK_GROUP_SIZE
)
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
if
(
thread
==
0
)
*result
=
tempBuffer[0]
;
}
#
endif
__kernel
void
applyPositionDeltas
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta
)
{
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
pos.xyz
+=
posDelta[index].xyz
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
posDelta[index]
=
(
mixed4
)
0
;
}
}
__kernel
void
generateRandomNumbers
(
int
numValues,
__global
float4*
restrict
random,
__global
uint4*
restrict
seed
)
{
uint4
state
=
seed[get_global_id
(
0
)
]
;
unsigned
int
carry
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < numValues; index += get_global_size(0)) {
//
Generate
three
uniform
random
numbers.
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
unsigned
int
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
unsigned
int
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x1
=
(
float
)
max
(
state.x
+
state.y
+
state.w,
0x00000001u
)
/
(
float
)
0xffffffff
;
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x2
=
(
float
)
max
(
state.x
+
state.y
+
state.w,
0x00000001u
)
/
(
float
)
0xffffffff
;
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x3
=
(
float
)
max
(
state.x
+
state.y
+
state.w,
0x00000001u
)
/
(
float
)
0xffffffff
;
//
Record
the
values.
random[index]
=
(
float4
)
(
x1,
x2,
x3,
0.0f
)
;
}
seed[get_global_id
(
0
)
]
=
state
;
}
platforms/opencl/src/kernels/customIntegratorPerDof.cl
deleted
100644 → 0
View file @
8dd60914
/**
*
Load
the
position
of
a
particle.
*/
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
*
Store
the
position
of
a
particle.
*/
void
storePos
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
int
index,
mixed4
pos
)
{
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
__kernel
void
computePerDof
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
posDelta,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed2*
restrict
dt,
__global
const
mixed*
restrict
globals,
__global
mixed*
restrict
sum,
__global
const
float4*
restrict
gaussianValues,
unsigned
int
gaussianBaseIndex,
__global
const
float4*
restrict
uniformValues,
const
mixed
energy,
__global
mixed*
restrict
energyParamDerivs
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
#
ifdef
LOAD_POS_AS_DELTA
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
+posDelta[index]
;
#
else
mixed4
position
=
loadPos
(
posq,
posqCorrection,
index
)
;
#
endif
mixed4
velocity
=
velm[index]
;
mixed4
f
=
convert_mixed4
(
force[index]
)
;
mixed
mass
=
1/velocity.w
;
if
(
velocity.w
!=
0.0
)
{
int
gaussianIndex
=
gaussianBaseIndex
;
int
uniformIndex
=
0
;
COMPUTE_STEP
}
index
+=
get_global_size
(
0
)
;
}
}
platforms/opencl/src/kernels/customNonbonded.cl
deleted
100644 → 0
View file @
8dd60914
#
ifdef
USE_CUTOFF
if
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
if
(
!isExcluded
)
{
#
endif
real
tempForce
=
0.0f
;
real
switchValue
=
1
,
switchDeriv
=
0
;
#
if
USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r-SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
SWITCH_C3+x*
(
SWITCH_C4+x*SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*SWITCH_C3+x*
(
4*SWITCH_C4+x*5*SWITCH_C5
))
;
}
#
endif
COMPUTE_FORCE
#
if
USE_SWITCH
tempForce
=
tempForce*switchValue
-
customEnergy*switchDeriv
;
tempEnergy
+=
customEnergy*switchValue
;
#
else
tempEnergy
+=
customEnergy
;
#
endif
dEdR
+=
tempForce*invR
;
}
platforms/opencl/src/kernels/gayBerne.cl
deleted
100644 → 0
View file @
8dd60914
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
define
TILE_SIZE
32
#
define
NEIGHBOR_BLOCK_SIZE
32
/**
*
Calculate
the
ellipsoid
coordinate
frames
and
associated
matrices.
*/
__kernel
void
computeEllipsoidFrames
(
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
float4*
restrict
sigParams,
__global
const
float4*
restrict
scale,
__global
real*
restrict
aMatrix,
__global
real*
restrict
bMatrix,
__global
real*
restrict
gMatrix,
__global
const
int*
sortedParticles
)
{
for
(
int
sortedIndex
=
get_global_id
(
0
)
; sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
//
Compute
the
local
coordinate
system
of
the
ellipsoid
;
int
originalIndex
=
sortedParticles[sortedIndex]
;
real3
pos
=
posq[originalIndex].xyz
;
int2
axisParticles
=
axisParticleIndices[originalIndex]
;
real3
xdir,
ydir,
zdir
;
if
(
axisParticles.x
==
-1
)
{
xdir
=
(
real3
)
(
1
,
0
,
0
)
;
ydir
=
(
real3
)
(
0
,
1
,
0
)
;
}
else
{
xdir
=
pos-posq[axisParticles.x].xyz
;
xdir
=
normalize
(
xdir
)
;
if
(
axisParticles.y
==
-1
)
{
if
(
xdir.y
>
-0.5f
&&
xdir.y
<
0.5f
)
ydir
=
(
real3
)
(
0
,
1
,
0
)
;
else
ydir
=
(
real3
)
(
1
,
0
,
0
)
;
}
else
ydir
=
pos-posq[axisParticles.y].xyz
;
ydir
-=
xdir*dot
(
xdir,
ydir
)
;
ydir
=
normalize
(
ydir
)
;
}
zdir
=
cross
(
xdir,
ydir
)
;
//
Compute
matrices
we
will
need
later.
__global
real
(
*a
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
aMatrix+sortedIndex*9
)
;
__global
real
(
*b
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
bMatrix+sortedIndex*9
)
;
__global
real
(
*g
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
gMatrix+sortedIndex*9
)
;
a[0][0]
=
xdir.x
;
a[0][1]
=
xdir.y
;
a[0][2]
=
xdir.z
;
a[1][0]
=
ydir.x
;
a[1][1]
=
ydir.y
;
a[1][2]
=
ydir.z
;
a[2][0]
=
zdir.x
;
a[2][1]
=
zdir.y
;
a[2][2]
=
zdir.z
;
float4
sig
=
sigParams[originalIndex]
;
float3
r2
=
sig.yzw
;
float3
e2
=
scale[originalIndex].xyz
;
for
(
int
i
=
0
; i < 3; i++)
for
(
int
j
=
0
; j < 3; j++) {
b[i][j]
=
a[0][i]*e2.x*a[0][j]
+
a[1][i]*e2.y*a[1][j]
+
a[2][i]*e2.z*a[2][j]
;
g[i][j]
=
a[0][i]*r2.x*a[0][j]
+
a[1][i]*r2.y*a[1][j]
+
a[2][i]*r2.z*a[2][j]
;
}
}
}
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
neighborBlockCount
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
real4
pos
=
posq[sortedAtoms[base]]
;
sortedPos[base]
=
pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_POS
(
pos
)
#
endif
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
int
last
=
min
(
base+TILE_SIZE,
numAtoms
)
;
for
(
int
i
=
base+1
; i < last; i++) {
pos
=
posq[sortedAtoms[i]]
;
sortedPos[i]
=
pos
;
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
#
endif
minPos
=
min
(
minPos,
pos
)
;
maxPos
=
max
(
maxPos,
pos
)
;
}
real4
blockSize
=
0.5f*
(
maxPos-minPos
)
;
blockBoundingBox[index]
=
blockSize
;
blockCenter[index]
=
0.5f*
(
maxPos+minPos
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
}
if
(
get_global_id
(
0
)
==
0
)
*neighborBlockCount
=
0
;
}
/**
*
This
is
called
by
findNeighbors
()
to
write
a
block
to
the
neighbor
list.
*/
void
storeNeighbors
(
int
atom1,
int*
neighborBuffer,
int
numAtomsInBuffer,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount
)
{
int
blockIndex
=
atom_add
(
neighborBlockCount,
1
)
;
if
(
blockIndex
>=
maxNeighborBlocks
)
return
; // We don't have enough room for the neighbor list.
neighborIndex[blockIndex]
=
atom1
;
int
baseIndex
=
blockIndex*NEIGHBOR_BLOCK_SIZE
;
for
(
int
i
=
0
; i < numAtomsInBuffer; i++)
neighbors[baseIndex+i]
=
neighborBuffer[i]
;
for
(
int
i
=
numAtomsInBuffer
; i < NEIGHBOR_BLOCK_SIZE; i++)
neighbors[baseIndex+i]
=
-1
;
}
/**
*
Build
a
list
of
neighbors
for
each
atom.
*/
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
const
int
numBlocks
=
(
numAtoms+TILE_SIZE-1
)
/TILE_SIZE
;
int
neighborBuffer[NEIGHBOR_BLOCK_SIZE]
;
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
int
nextExclusion
=
exclusionStartIndex[atom1]
;
int
lastExclusion
=
exclusionStartIndex[atom1+1]
;
real4
pos
=
sortedPos[atom1]
;
int
nextBufferIndex
=
0
;
//
Loop
over
atom
blocks
and
compute
the
distance
of
this
atom
from
each
one
's
bounding
box.
for
(
int
block
=
(
atom1+1
)
/TILE_SIZE
; block < numBlocks; block++) {
real4
center
=
blockCenter[block]
;
real4
blockSize
=
blockBoundingBox[block]
;
real4
blockDelta
=
center-pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize.y
)
;
blockDelta.z
=
max
((
real
)
0
,
fabs
(
blockDelta.z
)
-blockSize.z
)
;
if
(
blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z
>=
CUTOFF_SQUARED
)
continue
;
//
Loop
over
atoms
within
this
block.
int
first
=
max
(
block*TILE_SIZE,
atom1+1
)
;
int
last
=
min
((
block+1
)
*TILE_SIZE,
numAtoms
)
;
for
(
int
atom2
=
first
; atom2 < last; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
>=
atom2
)
{
nextExclusion++
;
continue
;
}
real4
delta
=
pos-sortedPos[atom2]
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
neighborBuffer[nextBufferIndex++]
=
atom2
;
if
(
nextBufferIndex
==
NEIGHBOR_BLOCK_SIZE
)
{
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
nextBufferIndex
=
0
;
}
}
}
}
if
(
nextBufferIndex
>
0
)
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
}
}
typedef
struct
{
float4
sig
;
float2
eps
;
real3
pos
;
real
a[3][3],
b[3][3],
g[3][3]
;
}
AtomData
;
void
loadAtomData
(
AtomData*
data,
int
sortedIndex,
int
originalIndex,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix
)
{
data->sig
=
sigParams[originalIndex]
;
data->eps
=
epsParams[originalIndex]
;
data->pos
=
pos[sortedIndex].xyz
;
for
(
int
i
=
0
; i < 3; i++)
for
(
int
j
=
0
; j < 3; j++) {
int
k
=
9*sortedIndex+3*i+j
;
data->a[i][j]
=
aMatrix[k]
;
data->b[i][j]
=
bMatrix[k]
;
data->g[i][j]
=
gMatrix[k]
;
}
}
real3
matrixVectorProduct
(
real
(
*m
)
[3],
real3
v
)
{
return
(
real3
)
(
m[0][0]*v.x
+
m[0][1]*v.y
+
m[0][2]*v.z,
m[1][0]*v.x
+
m[1][1]*v.y
+
m[1][2]*v.z,
m[2][0]*v.x
+
m[2][1]*v.y
+
m[2][2]*v.z
)
;
}
real3
vectorMatrixProduct
(
real3
v,
real
(
*m
)
[3]
)
{
return
(
real3
)
(
m[0][0]*v.x
+
m[1][0]*v.y
+
m[2][0]*v.z,
m[0][1]*v.x
+
m[1][1]*v.y
+
m[2][1]*v.z,
m[0][2]*v.x
+
m[1][2]*v.y
+
m[2][2]*v.z
)
;
}
void
matrixSum
(
real
(
*result
)
[3],
real
(
*a
)
[3],
real
(
*b
)
[3]
)
{
result[0][0]
=
a[0][0]+b[0][0]
;
result[0][1]
=
a[0][1]+b[0][1]
;
result[0][2]
=
a[0][2]+b[0][2]
;
result[1][0]
=
a[1][0]+b[1][0]
;
result[1][1]
=
a[1][1]+b[1][1]
;
result[1][2]
=
a[1][2]+b[1][2]
;
result[2][0]
=
a[2][0]+b[2][0]
;
result[2][1]
=
a[2][1]+b[2][1]
;
result[2][2]
=
a[2][2]+b[2][2]
;
}
real
determinant
(
real
(
*m
)
[3]
)
{
return
(
m[0][0]*m[1][1]*m[2][2]
+
m[0][1]*m[1][2]*m[2][0]
+
m[0][2]*m[1][0]*m[2][1]
-
m[0][0]*m[1][2]*m[2][1]
-
m[0][1]*m[1][0]*m[2][2]
-
m[0][2]*m[1][1]*m[2][0]
)
;
}
void
matrixInverse
(
real
(
*result
)
[3],
real
(
*m
)
[3]
)
{
real
invDet
=
RECIP
(
determinant
(
m
))
;
result[0][0]
=
invDet*
(
m[1][1]*m[2][2]
-
m[1][2]*m[2][1]
)
;
result[1][0]
=
-invDet*
(
m[1][0]*m[2][2]
-
m[1][2]*m[2][0]
)
;
result[2][0]
=
invDet*
(
m[1][0]*m[2][1]
-
m[1][1]*m[2][0]
)
;
result[0][1]
=
-invDet*
(
m[0][1]*m[2][2]
-
m[0][2]*m[2][1]
)
;
result[1][1]
=
invDet*
(
m[0][0]*m[2][2]
-
m[0][2]*m[2][0]
)
;
result[2][1]
=
-invDet*
(
m[0][0]*m[2][1]
-
m[0][1]*m[2][0]
)
;
result[0][2]
=
invDet*
(
m[0][1]*m[1][2]
-
m[0][2]*m[1][1]
)
;
result[1][2]
=
-invDet*
(
m[0][0]*m[1][2]
-
m[0][2]*m[1][0]
)
;
result[2][2]
=
invDet*
(
m[0][0]*m[1][1]
-
m[0][1]*m[1][0]
)
;
}
void
computeOneInteraction
(
AtomData*
data1,
AtomData*
data2,
real
sigma,
real
epsilon,
real3
dr,
real
r2,
real3*
force1,
real3*
force2,
real3*
torque1,
real3*
torque2,
mixed
*totalEnergy
)
{
real
rInv
=
RSQRT
(
r2
)
;
real
r
=
r2*rInv
;
real3
drUnit
=
dr*rInv
;
//
Compute
the
switching
function.
real
switchValue
=
1
,
switchDeriv
=
0
;
#
if
USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r-SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
SWITCH_C3+x*
(
SWITCH_C4+x*SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*SWITCH_C3+x*
(
4*SWITCH_C4+x*5*SWITCH_C5
))
;
}
#
endif
//
Compute
vectors
and
matrices
we
'll
be
needing.
real
B12[3][3],
G12[3][3],
B12inv[3][3],
G12inv[3][3]
;
matrixSum
(
B12,
data1->b,
data2->b
)
;
matrixSum
(
G12,
data1->g,
data2->g
)
;
matrixInverse
(
B12inv,
B12
)
;
matrixInverse
(
G12inv,
G12
)
;
real
detG12
=
determinant
(
G12
)
;
//
Estimate
the
distance
between
the
ellipsoids
and
compute
the
first
terms
needed
for
the
energy.
real
sigma12
=
1/SQRT
(
0.5f*dot
(
drUnit,
matrixVectorProduct
(
G12inv,
drUnit
)))
;
real
h12
=
r
-
sigma12
;
real
rho
=
sigma/
(
h12+sigma
)
;
real
rho2
=
rho*rho
;
real
rho6
=
rho2*rho2*rho2
;
real
u
=
4*epsilon*
(
rho6*rho6-rho6
)
;
real
eta
=
SQRT
(
2*data1->eps.y*data2->eps.y/detG12
)
;
real
chi
=
2*dot
(
drUnit,
matrixVectorProduct
(
B12inv,
drUnit
))
;
chi
*=
chi
;
real
energy
=
u*eta*chi
;
//
Compute
the
terms
needed
for
the
force.
real3
kappa
=
matrixVectorProduct
(
G12inv,
dr
)
;
real3
iota
=
matrixVectorProduct
(
B12inv,
dr
)
;
real
rInv2
=
rInv*rInv
;
real
dUSLJdr
=
24*epsilon*
(
2*rho6-1
)
*rho6*
rho/sigma
;
real
temp
=
0.5f*sigma12*sigma12*sigma12*rInv2
;
real3
dudr
=
(
drUnit
+
(
kappa-drUnit*dot
(
kappa,
drUnit
))
*temp
)
*dUSLJdr
;
real3
dchidr
=
(
iota-drUnit*dot
(
iota,
drUnit
))
*
(
-8*rInv2*SQRT
(
chi
))
;
real3
force
=
(
dchidr*u
+
dudr*chi
)
*
(
eta*switchValue
)
-
drUnit*
(
energy*switchDeriv
)
;
*force1
+=
force
;
*force2
-=
force
;
//
Compute
the
terms
needed
for
the
torque.
for
(
int
j
=
0
; j < 2; j++) {
real
(
*a
)
[3]
=
(
j
==
0
?
data1->a
:
data2->a
)
;
real
(
*b
)
[3]
=
(
j
==
0
?
data1->b
:
data2->b
)
;
real
(
*g
)
[3]
=
(
j
==
0
?
data1->g
:
data2->g
)
;
float4
sig
=
(
j
==
0
?
data1->sig
:
data2->sig
)
;
real3
dudq
=
cross
(
vectorMatrixProduct
(
kappa,
g
)
,
kappa*
(
temp*dUSLJdr
))
;
real3
dchidq
=
cross
(
vectorMatrixProduct
(
iota,
b
)
,
iota
)
*
(
-4*rInv2
)
;
real3
scale
=
(
real3
)
(
sig.y,
sig.z,
sig.w
)
*
(
-0.5f*eta/detG12
)
;
real
d[3][3]
;
d[0][0]
=
scale.x*
(
2*a[0][0]*
(
G12[1][1]*G12[2][2]
-
G12[1][2]*G12[2][1]
)
+
a[0][2]*
(
G12[1][2]*G12[0][1]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[0][1]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
)))
;
d[0][1]
=
scale.x*
(
a[0][0]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[0][1]*
(
G12[0][0]*G12[2][2]
-
G12[2][0]*G12[0][2]
)
+
a[0][2]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[0][2]
=
scale.x*
(
a[0][0]*
(
G12[0][1]*G12[1][2]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[0][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[0][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
d[1][0]
=
scale.y*
(
2*a[1][0]*
(
G12[1][1]*G12[2][2]
-
G12[1][2]*G12[2][1]
)
+
a[1][1]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
a[1][2]*
(
G12[1][2]*G12[0][1]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
)))
;
d[1][1]
=
scale.y*
(
a[1][0]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[1][1]*
(
G12[2][2]*G12[0][0]
-
G12[2][0]*G12[0][2]
)
+
a[1][2]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[1][2]
=
scale.y*
(
a[1][0]*
(
G12[0][1]*G12[1][2]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[1][1]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[1][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
d[2][0]
=
scale.z*
(
2*a[2][0]*
(
G12[1][1]*G12[2][2]
-
G12[2][1]*G12[1][2]
)
+
a[2][1]*
(
G12[0][2]*G12[2][1]
+
G12[1][2]*G12[2][0]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
a[2][2]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
)))
;
d[2][1]
=
scale.z*
(
a[2][0]*
(
G12[0][2]*G12[2][1]
+
G12[1][2]*G12[2][0]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[2][1]*
(
G12[0][0]*G12[2][2]
-
G12[0][2]*G12[2][0]
)
+
a[2][2]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[2][2]
=
scale.z*
(
a[2][0]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[2][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[2][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
real3
detadq
=
0
;
for
(
int
i
=
0
; i < 3; i++)
detadq
+=
cross
((
real3
)
(
a[i][0],
a[i][1],
a[i][2]
)
,
(
real3
)
(
d[i][0],
d[i][1],
d[i][2]
))
;
real3
torque
=
(
dchidq*
(
u*eta
)
+
detadq*
(
u*chi
)
+
dudq*
(
eta*chi
))
*switchValue
;
*
(
j
==
0
?
torque1
:
torque2
)
-=
torque
;
}
*totalEnergy
+=
switchValue*energy
;
}
/**
*
Compute
the
interactions.
*/
__kernel
void
computeForce
(
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
int
numAtoms,
int
numExceptions,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
#
ifdef
USE_CUTOFF
,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
endif
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
mixed
energy
=
0
;
#
ifdef
USE_CUTOFF
const
int
numBlocks
=
*neighborBlockCount
;
if
(
numBlocks
>
maxNeighborBlocks
)
return
; // There wasn't enough memory for the neighbor list.
for
(
int
block
=
get_global_id
(
0
)
; block < numBlocks; block += get_global_size(0)) {
//
Load
parameters
for
atom1.
int
atom1
=
neighborIndex[block]
;
int
index1
=
sortedAtoms[atom1]
;
AtomData
data1
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0.0f
;
real3
torque1
=
0.0f
;
for
(
int
indexInBlock
=
0
; indexInBlock < NEIGHBOR_BLOCK_SIZE; indexInBlock++) {
//
Load
parameters
for
atom2.
int
atom2
=
neighbors[NEIGHBOR_BLOCK_SIZE*block+indexInBlock]
;
if
(
atom2
==
-1
)
continue
;
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
torque2
=
0.0f
;
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
}
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
}
#
else
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
//
Load
parameters
for
atom1.
int
index1
=
sortedAtoms[atom1]
;
AtomData
data1
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0.0f
;
real3
torque1
=
0.0f
;
int
nextExclusion
=
exclusionStartIndex[atom1]
;
int
lastExclusion
=
exclusionStartIndex[atom1+1]
;
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
nextExclusion++
;
continue
;
}
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
torque2
=
0.0f
;
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
}
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
}
#
endif
//
Now
compute
exceptions.
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
int4
atomIndices
=
exceptionParticles[index]
;
float2
params
=
exceptionParams[index]
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
AtomData
data1,
data2
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0
,
force2
=
0
;
real3
torque1
=
0
,
torque2
=
0
;
real3
delta
=
data1.pos-data2.pos
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
computeOneInteraction
(
&data1,
&data2,
params.x,
params.y,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
ifdef
USE_CUTOFF
}
#
endif
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
/**
*
Convert
the
torques
to
forces
on
the
connected
particles.
*/
__kernel
void
applyTorques
(
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
int*
sortedParticles
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
for
(
int
sortedIndex
=
get_global_id
(
0
)
; sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
int
originalIndex
=
sortedParticles[sortedIndex]
;
real3
pos
=
posq[originalIndex].xyz
;
int2
axisParticles
=
axisParticleIndices[originalIndex]
;
if
(
axisParticles.x
!=
-1
)
{
//
Load
the
torque.
real
scale
=
1/
(
real
)
0x100000000
;
real3
torque
=
(
real3
)
(
scale*torqueBuffers[originalIndex],
scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS],
scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]
)
;
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
//
Apply
a
force
to
the
x
particle.
real3
dx
=
posq[axisParticles.x].xyz-pos
;
real
dx2
=
dot
(
dx,
dx
)
;
real3
f
=
cross
(
torque,
dx
)
/dx2
;
xforce
+=
f
;
force
-=
f
;
if
(
axisParticles.y
!=
-1
)
{
//
Apply
a
force
to
the
y
particle.
This
is
based
on
the
component
of
the
torque
//
that
was
not
already
applied
to
the
x
particle.
real3
dy
=
posq[axisParticles.y].xyz-pos
;
real
dy2
=
dot
(
dy,
dy
)
;
real3
torque2
=
dx*dot
(
torque,
dx
)
/dx2
;
f
=
cross
(
torque2,
dy
)
/dy2
;
yforce
+=
f
;
force
-=
f
;
}
atom_add
(
&forceBuffers[originalIndex],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x],
(
long
)
(
xforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS],
(
long
)
(
xforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS],
(
long
)
(
xforce.z*0x100000000
))
;
if
(
axisParticles.y
!=
-1
)
{
atom_add
(
&forceBuffers[axisParticles.y],
(
long
)
(
yforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS],
(
long
)
(
yforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS],
(
long
)
(
yforce.z*0x100000000
))
;
}
}
}
}
platforms/opencl/src/kernels/gbsaObc2.cl
deleted
100644 → 0
View file @
8dd60914
{
real
invRSquaredOver4
=
0.25f*invR*invR
;
real
rScaledRadiusJ
=
r+OBC_PARAMS2.y
;
real
rScaledRadiusI
=
r+OBC_PARAMS1.y
;
real
l_ijJ
=
RECIP
(
max
((
real
)
OBC_PARAMS1.x,
fabs
(
r-OBC_PARAMS2.y
)))
;
real
l_ijI
=
RECIP
(
max
((
real
)
OBC_PARAMS2.x,
fabs
(
r-OBC_PARAMS1.y
)))
;
real
u_ijJ
=
RECIP
(
rScaledRadiusJ
)
;
real
u_ijI
=
RECIP
(
rScaledRadiusI
)
;
real
l_ij2J
=
l_ijJ*l_ijJ
;
real
l_ij2I
=
l_ijI*l_ijI
;
real
u_ij2J
=
u_ijJ*u_ijJ
;
real
u_ij2I
=
u_ijI*u_ijI
;
real
t1J
=
LOG
(
u_ijJ*RECIP
(
l_ijJ
))
;
real
t1I
=
LOG
(
u_ijI*RECIP
(
l_ijI
))
;
real
t2J
=
(
l_ij2J-u_ij2J
)
;
real
t2I
=
(
l_ij2I-u_ij2I
)
;
real
term1
=
(
0.5f*
(
0.25f+OBC_PARAMS2.y*OBC_PARAMS2.y*invRSquaredOver4
)
*t2J
+
t1J*invRSquaredOver4
)
*invR
;
real
term2
=
(
0.5f*
(
0.25f+OBC_PARAMS1.y*OBC_PARAMS1.y*invRSquaredOver4
)
*t2I
+
t1I*invRSquaredOver4
)
*invR
;
real
tempdEdR
=
(
OBC_PARAMS1.x
<
rScaledRadiusJ
?
BORN_FORCE1*term1
:
(
real
)
0
)
;
tempdEdR
+=
(
OBC_PARAMS2.x
<
rScaledRadiusI
?
BORN_FORCE2*term2
:
(
real
)
0
)
;
#
ifdef
USE_CUTOFF
bool
includeInteraction
=
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
bool
includeInteraction
=
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
;
#
endif
dEdR
+=
(
includeInteraction
?
tempdEdR
:
(
real
)
0
)
;
}
platforms/opencl/src/kernels/harmonicAngleForce.cl
deleted
100644 → 0
View file @
8dd60914
float2
angleParams
=
PARAMS[index]
;
real
deltaIdeal
=
theta-angleParams.x
;
energy
+=
0.5f*angleParams.y*deltaIdeal*deltaIdeal
;
real
dEdAngle
=
angleParams.y*deltaIdeal
;
platforms/opencl/src/kernels/harmonicBondForce.cl
deleted
100644 → 0
View file @
8dd60914
float2
bondParams
=
PARAMS[index]
;
real
deltaIdeal
=
r-bondParams.x
;
energy
+=
0.5f
*
bondParams.y*deltaIdeal*deltaIdeal
;
real
dEdR
=
bondParams.y
*
deltaIdeal
;
platforms/opencl/src/kernels/integrationUtilities.cl
deleted
100644 → 0
View file @
8dd60914
/**
*
Apply
a
time
shift
to
the
velocities
before
computing
kinetic
energy.
*/
__kernel
void
timeShiftVelocities
(
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
real
timeShift
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
mixed4
f
=
convert_mixed4
(
force[index]
)
;
velocity.xyz
+=
timeShift*f.xyz*velocity.w
;
velm[index]
=
velocity
;
}
}
}
\ No newline at end of file
platforms/opencl/src/kernels/langevin.cl
deleted
100644 → 0
View file @
8dd60914
enum
{VelScale,
ForceScale,
NoiseScale,
MaxParams}
;
/**
*
Perform
the
first
step
of
Langevin
integration.
*/
__kernel
void
integrateLangevinPart1
(
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
mixed4*
restrict
posDelta,
__global
const
mixed*
restrict
paramBuffer,
__global
const
mixed2*
restrict
dt,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
mixed
vscale
=
paramBuffer[VelScale]
;
mixed
fscale
=
paramBuffer[ForceScale]
;
mixed
noisescale
=
paramBuffer[NoiseScale]
;
mixed
stepSize
=
dt[0].y
;
int
index
=
get_global_id
(
0
)
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
mixed
sqrtInvMass
=
sqrt
(
velocity.w
)
;
velocity.x
=
vscale*velocity.x
+
fscale*velocity.w*force[index].x
+
noisescale*sqrtInvMass*random[randomIndex].x
;
velocity.y
=
vscale*velocity.y
+
fscale*velocity.w*force[index].y
+
noisescale*sqrtInvMass*random[randomIndex].y
;
velocity.z
=
vscale*velocity.z
+
fscale*velocity.w*force[index].z
+
noisescale*sqrtInvMass*random[randomIndex].z
;
velm[index]
=
velocity
;
posDelta[index]
=
stepSize*velocity
;
}
randomIndex
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Perform
the
second
step
of
Langevin
integration.
*/
__kernel
void
integrateLangevinPart2
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
const
mixed4*
restrict
posDelta,
__global
mixed4*
restrict
velm,
__global
const
mixed2*
restrict
dt
)
{
#
ifdef
SUPPORTS_DOUBLE_PRECISION
double
invStepSize
=
1.0/dt[0].y
;
#
else
float
invStepSize
=
1.0f/dt[0].y
;
float
correction
=
(
1.0f-invStepSize*dt[0].y
)
/dt[0].y
;
#
endif
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
mixed4
vel
=
velm[index]
;
if
(
vel.w
!=
0.0
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
mixed4
delta
=
posDelta[index]
;
pos.xyz
+=
delta.xyz
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
vel.xyz
=
convert_mixed4
(
invStepSize*convert_double4
(
delta
))
.
xyz
;
#
else
vel.xyz
=
invStepSize*delta.xyz
+
correction*delta.xyz
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
velm[index]
=
vel
;
}
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Select
the
step
size
to
use
for
the
next
step.
*/
__kernel
void
selectLangevinStepSize
(
mixed
maxStepSize,
mixed
errorTol,
mixed
friction,
mixed
kT,
__global
mixed2*
restrict
dt,
__global
const
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
mixed*
restrict
paramBuffer,
__local
mixed*
restrict
params,
__local
mixed*
restrict
error
)
{
//
Calculate
the
error.
mixed
err
=
0.0f
;
unsigned
int
index
=
get_local_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
real4
f
=
force[index]
;
mixed
invMass
=
velm[index].w
;
err
+=
(
f.x*f.x
+
f.y*f.y
+
f.z*f.z
)
*invMass*
invMass
;
index
+=
get_global_size
(
0
)
;
}
error[get_local_id
(
0
)
]
=
err
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Sum
the
errors
from
all
threads.
for
(
unsigned
int
offset
=
1
; offset < get_local_size(0); offset *= 2) {
if
(
get_local_id
(
0
)
+offset
<
get_local_size
(
0
)
&&
(
get_local_id
(
0
)
&
(
2*offset-1
))
==
0
)
error[get_local_id
(
0
)
]
+=
error[get_local_id
(
0
)
+offset]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_global_id
(
0
)
==
0
)
{
//
Select
the
new
step
size.
mixed
totalError
=
sqrt
(
error[0]/
(
NUM_ATOMS*3
))
;
mixed
newStepSize
=
sqrt
(
errorTol/totalError
)
;
mixed
oldStepSize
=
dt[0].y
;
if
(
oldStepSize
>
0.0f
)
newStepSize
=
min
(
newStepSize,
oldStepSize*2.0f
)
; // For safety, limit how quickly dt can increase.
if
(
newStepSize
>
oldStepSize
&&
newStepSize
<
1.1f*oldStepSize
)
newStepSize
=
oldStepSize
; // Keeping dt constant between steps improves the behavior of the integrator.
if
(
newStepSize
>
maxStepSize
)
newStepSize
=
maxStepSize
;
dt[0].y
=
newStepSize
;
//
Recalculate
the
integration
parameters.
mixed
vscale
=
exp
(
-newStepSize*friction
)
;
mixed
fscale
=
(
friction
==
0
?
newStepSize
:
(
1-vscale
)
/friction
)
;
mixed
noisescale
=
sqrt
(
kT*
(
1-vscale*vscale
))
;
params[VelScale]
=
vscale
;
params[ForceScale]
=
fscale
;
params[NoiseScale]
=
noisescale
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
MaxParams
)
paramBuffer[get_local_id
(
0
)
]
=
params[get_local_id
(
0
)
]
;
}
platforms/opencl/src/kernels/nonbondedExceptions.cl
View file @
5a06df78
float4
exceptionParams
=
PARAMS[index]
;
float4
exceptionParams
=
PARAMS[index]
;
real4
delta
=
pos2-pos1
;
real4
delta
=
pos2-pos1
;
#
if
APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
sig2
=
invR*exceptionParams.y
;
real
sig2
=
invR*exceptionParams.y
;
...
...
platforms/opencl/src/kernels/noseHooverChain.cl
0 → 100644
View file @
5a06df78
//#include
<initializer_list>
__kernel
void
propagateNoseHooverChain
(
__global
mixed2*
restrict
chainData,
__global
const
mixed2
*
restrict
energySum,
__global
mixed2*
restrict
scaleFactor,
__global
mixed*
restrict
chainMasses,
__global
mixed*
restrict
chainForces,
int
chainType,
int
chainLength,
int
numMTS,
int
numDOFs,
float
timeStep,
mixed
kT,
float
frequency
)
{
const
mixed
kineticEnergy
=
chainType
==
0
?
energySum[0].x
:
energySum[0].y
;
mixed
scale
=
1
;
if
(
kineticEnergy
<
1e-8
)
return
;
for
(
int
bead
=
0
; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0]
*=
numDOFs
;
mixed
KE2
=
2.0f
*
kineticEnergy
;
mixed
timeOverMTS
=
timeStep
/
numMTS
;
chainForces[0]
=
(
KE2
-
numDOFs
*
kT
)
/
chainMasses[0]
;
for
(
int
bead
=
0
; bead < chainLength - 1; ++bead) {
chainForces[bead
+
1]
=
(
chainMasses[bead]
*
chainData[bead].y
*
chainData[bead].y
-
kT
)
/
chainMasses[bead
+
1]
;
}
for
(
int
mts
=
0
; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
mixed
wdt
=
ys
*
timeOverMTS
;
chainData[chainLength-1].y
+=
0.25f
*
wdt
*
chainForces[chainLength-1]
;
for
(
int
bead
=
chainLength
-
2
; bead >= 0; --bead) {
mixed
aa
=
EXP
(
-0.125f
*
wdt
*
chainData[bead
+
1].y
)
;
chainData[bead].y
=
aa
*
(
chainData[bead].y
*
aa
+
0.25f
*
wdt
*
chainForces[bead]
)
;
}
//
update
particle
velocities
mixed
aa
=
EXP
(
-0.5f
*
wdt
*
chainData[0].y
)
;
scale
*=
aa
;
//
update
the
thermostat
positions
for
(
int
bead
=
0
; bead < chainLength; ++bead) {
chainData[bead].x
+=
0.5f
*
chainData[bead].y
*
wdt
;
}
//
update
the
forces
chainForces[0]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses[0]
;
//
update
thermostat
velocities
for
(
int
bead
=
0
; bead < chainLength - 1; ++bead) {
mixed
aa
=
EXP
(
-0.125f
*
wdt
*
chainData[bead
+
1].y
)
;
chainData[bead].y
=
aa
*
(
aa
*
chainData[bead].y
+
0.25f
*
wdt
*
chainForces[bead]
)
;
chainForces[bead
+
1]
=
(
chainMasses[bead]
*
chainData[bead].y
*
chainData[bead].y
-
kT
)
/
chainMasses[bead
+
1]
;
}
chainData[chainLength-1].y
+=
0.25f
*
wdt
*
chainForces[chainLength-1]
;
END_YS_LOOP
}
//
MTS
loop
if
(
chainType
==
0
)
{
scaleFactor[0].x
=
scale
;
}
else
{
scaleFactor[0].y
=
scale
;
}
}
/**
*
Compute
total
(
potential
+
kinetic
)
energy
of
the
Nose-Hoover
beads
*/
__kernel
void
computeHeatBathEnergy
(
__global
mixed*
restrict
heatBathEnergy,
int
chainLength,
int
numDOFs,
mixed
kT,
float
frequency,
__global
const
mixed2*
restrict
chainData
)
{
//
Note
that
this
is
always
incremented
; make sure it's zeroed properly before the first call
for
(
int
i
=
0
; i < chainLength; ++i) {
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
)
;
mixed
velocity
=
chainData[i].y
;
//
The
kinetic
energy
of
this
bead
heatBathEnergy[0]
+=
0.5f
*
mass
*
velocity
*
velocity
;
//
The
potential
energy
of
this
bead
mixed
position
=
chainData[i].x
;
heatBathEnergy[0]
+=
prefac
*
kT
*
position
;
}
}
__kernel
void
computeAtomsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numAtoms,
__global
const
mixed4*
restrict
velm,
__global
const
int
*restrict
atoms
)
{
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
//energy
=
1
; return;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
int
atom
=
atoms[index]
;
mixed4
v
=
velm[atom]
;
mixed
mass
=
v.w
==
0
?
0
:
1
/
v.w
;
energy.x
+=
0.5f
*
mass
*
(
v.x*v.x
+
v.y*v.y
+
v.z*v.z
)
;
index
+=
get_global_size
(
0
)
;
}
energyBuffer[get_global_id
(
0
)
]
=
energy
;
}
__kernel
void
computePairsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numPairs,
__global
const
mixed4*
restrict
velm,
__global
const
int2
*restrict
pairs
)
{
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int2
pair
=
pairs[index]
;
int
atom1
=
pair.x
;
int
atom2
=
pair.y
;
mixed4
v1
=
velm[atom1]
;
mixed4
v2
=
velm[atom2]
;
mixed
m1
=
v1.w
==
0
?
0
:
1
/
v1.w
;
mixed
m2
=
v2.w
==
0
?
0
:
1
/
v2.w
;
mixed4
cv
;
cv.x
=
(
m1*v1.x
+
m2*v2.x
)
/
(
m1
+
m2
)
;
cv.y
=
(
m1*v1.y
+
m2*v2.y
)
/
(
m1
+
m2
)
;
cv.z
=
(
m1*v1.z
+
m2*v2.z
)
/
(
m1
+
m2
)
;
mixed4
rv
;
rv.x
=
v2.x
-
v1.x
;
rv.y
=
v2.y
-
v1.y
;
rv.z
=
v2.z
-
v1.z
;
energy.x
+=
0.5f
*
(
m1
+
m2
)
*
(
cv.x*cv.x
+
cv.y*cv.y
+
cv.z*cv.z
)
;
energy.y
+=
0.5f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv.x*rv.x
+
rv.y*rv.y
+
rv.z*rv.z
)
;
index
+=
get_global_size
(
0
)
;
}
//
The
atoms
version
of
this
has
been
called
already,
so
accumulate
instead
of
assigning
here
energyBuffer[get_global_id
(
0
)
].xy
+=
energy.xy
;
}
__kernel
void
scaleAtomsVelocities
(
__global
mixed2*
restrict
scaleFactor,
int
numAtoms,
__global
mixed4*
restrict
velm,
__global
const
int
*restrict
atoms
)
{
const
mixed
scale
=
scaleFactor[0].x
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
int
atom
=
atoms[index]
;
velm[atom].x
*=
scale
;
velm[atom].y
*=
scale
;
velm[atom].z
*=
scale
;
index
+=
get_global_size
(
0
)
;
}
}
__kernel
void
scalePairsVelocities
(
__global
mixed2
*
restrict
scaleFactor,
int
numPairs,
__global
mixed4*
restrict
velm,
__global
const
int2
*restrict
pairs
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairs[index].x
;
int
atom2
=
pairs[index].y
;
mixed
m1
=
velm[atom1].w
==
0
?
0
:
1
/
velm[atom1].w
;
mixed
m2
=
velm[atom2].w
==
0
?
0
:
1
/
velm[atom2].w
;
mixed4
cv
;
cv.xyz
=
(
m1*velm[atom1].xyz
+
m2*velm[atom2].xyz
)
/
(
m1
+
m2
)
;
mixed4
rv
;
rv.xyz
=
velm[atom2].xyz
-
velm[atom1].xyz
;
velm[atom1].x
=
scaleFactor[0].x
*
cv.x
-
scaleFactor[0].y
*
rv.x
*
m2
/
(
m1
+
m2
)
;
velm[atom1].y
=
scaleFactor[0].x
*
cv.y
-
scaleFactor[0].y
*
rv.y
*
m2
/
(
m1
+
m2
)
;
velm[atom1].z
=
scaleFactor[0].x
*
cv.z
-
scaleFactor[0].y
*
rv.z
*
m2
/
(
m1
+
m2
)
;
velm[atom2].x
=
scaleFactor[0].x
*
cv.x
+
scaleFactor[0].y
*
rv.x
*
m1
/
(
m1
+
m2
)
;
velm[atom2].y
=
scaleFactor[0].x
*
cv.y
+
scaleFactor[0].y
*
rv.y
*
m1
/
(
m1
+
m2
)
;
velm[atom2].z
=
scaleFactor[0].x
*
cv.z
+
scaleFactor[0].y
*
rv.z
*
m1
/
(
m1
+
m2
)
;
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Sum
the
energy
buffer
containing
a
pair
of
energies
stored
as
mixed2.
This
is
copied
from
utilities.cu
with
small
modifications
*/
__kernel
void
reduceEnergyPair
(
__global
const
mixed2*
restrict
energyBuffer,
__global
mixed2*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed2*
restrict
tempBuffer
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
mixed2
sum
=
(
mixed2
)
(
0
,
0
)
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0)) {
sum.xy
+=
energyBuffer[index].xy
;
}
tempBuffer[thread].xy
=
sum.xy
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
workGroupSize
)
{
tempBuffer[thread].xy
+=
tempBuffer[thread+i].xy
;
}
}
if
(
thread
==
0
)
{
*result
=
tempBuffer[0]
;
}
}
platforms/opencl/src/kernels/periodicTorsionForce.cl
deleted
100644 → 0
View file @
8dd60914
float4
torsionParams
=
PARAMS[index]
;
real
deltaAngle
=
torsionParams.z*theta-torsionParams.y
;
energy
+=
torsionParams.x*
(
1.0f+cos
(
deltaAngle
))
;
real
sinDeltaAngle
=
sin
(
deltaAngle
)
;
real
dEdAngle
=
-torsionParams.x*torsionParams.z*sinDeltaAngle
;
platforms/opencl/src/kernels/random.cl
deleted
100644 → 0
View file @
8dd60914
/**
*
Generate
random
numbers
*/
__kernel
void
generateRandomNumbers
(
int
numValues,
__global
float4*
restrict
random,
__global
uint4*
restrict
seed
)
{
int
index
=
get_global_id
(
0
)
;
uint4
state
=
seed[index]
;
unsigned
int
carry
=
0
;
while
(
index
<
numValues
)
{
float4
value
;
//
Generate
first
two
values.
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
unsigned
int
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
unsigned
int
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x1
=
(
float
)
max
(
state.x
+
state.y
+
state.w,
0x00000001u
)
/
(
float
)
0xffffffff
;
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
x1
=
SQRT
(
-2.0f
*
LOG
(
x1
))
;
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x2
=
(
float
)(
state.x
+
state.y
+
state.w
)
/
(
float
)
0xffffffff
;
value.x
=
x1
*
cos
(
2.0f
*
3.14159265f
*
x2
)
;
value.y
=
x1
*
sin
(
2.0f
*
3.14159265f
*
x2
)
;
//
Generate
next
two
values.
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x3
=
(
float
)
max
(
state.x
+
state.y
+
state.w,
0x00000001u
)
/
(
float
)
0xffffffff
;
state.x
=
state.x
*
69069
+
1
;
state.y
^=
state.y
<<
13
;
state.y
^=
state.y
>>
17
;
state.y
^=
state.y
<<
5
;
x3
=
SQRT
(
-2.0f
*
LOG
(
x3
))
;
k
=
(
state.z
>>
2
)
+
(
state.w
>>
3
)
+
(
carry
>>
2
)
;
m
=
state.w
+
state.w
+
state.z
+
carry
;
state.z
=
state.w
;
state.w
=
m
;
carry
=
k
>>
30
;
float
x4
=
(
float
)(
state.x
+
state.y
+
state.w
)
/
(
float
)
0xffffffff
;
value.z
=
x3
*
cos
(
2.0f
*
3.14159265f
*
x4
)
;
value.w
=
x3
*
sin
(
2.0f
*
3.14159265f
*
x4
)
;
//
Record
the
values.
random[index]
=
value
;
index
+=
get_global_size
(
0
)
;
}
seed[get_global_id
(
0
)
]
=
state
;
}
platforms/opencl/src/kernels/rbTorsionForce.cl
deleted
100644 → 0
View file @
8dd60914
float8
torsionParams
=
PARAMS[index]
;
if
(
theta
<
0.0f
)
theta
+=
PI
;
else
theta
-=
PI
;
cosangle
=
-cosangle
;
real
cosFactor
=
cosangle
;
real
dEdAngle
=
-torsionParams.s1
;
real
rbEnergy
=
torsionParams.s0
;
rbEnergy
+=
torsionParams.s1*cosFactor
;
dEdAngle
-=
2.0f*torsionParams.s2*cosFactor
;
cosFactor
*=
cosangle
;
dEdAngle
-=
3.0f*torsionParams.s3*cosFactor
;
rbEnergy
+=
torsionParams.s2*cosFactor
;
cosFactor
*=
cosangle
;
dEdAngle
-=
4.0f*torsionParams.s4*cosFactor
;
rbEnergy
+=
torsionParams.s3*cosFactor
;
cosFactor
*=
cosangle
;
dEdAngle
-=
5.0f*torsionParams.s5*cosFactor
;
rbEnergy
+=
torsionParams.s4*cosFactor
;
rbEnergy
+=
torsionParams.s5*cosFactor*cosangle
;
energy
+=
rbEnergy
;
dEdAngle
*=
sin
(
theta
)
;
Prev
1
…
7
8
9
10
11
12
13
14
15
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment