Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8d6a2a01
Commit
8d6a2a01
authored
Oct 16, 2012
by
Peter Eastman
Browse files
Beginnings of mixed/double precision support in OpenCL
parent
a3d5f834
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
158 additions
and
108 deletions
+158
-108
platforms/opencl/src/kernels/shakeHydrogens.cl
platforms/opencl/src/kernels/shakeHydrogens.cl
+44
-34
platforms/opencl/src/kernels/verlet.cl
platforms/opencl/src/kernels/verlet.cl
+39
-24
platforms/opencl/src/kernels/virtualSites.cl
platforms/opencl/src/kernels/virtualSites.cl
+72
-47
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+1
-1
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+1
-1
No files found.
platforms/opencl/src/kernels/shakeHydrogens.cl
View file @
8d6a2a01
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
*
Enforce
constraints
on
SHAKE
clusters
*/
__kernel
void
applyShakeToHydrogens
(
int
numClusters,
float
tol,
__global
const
float
4*
restrict
oldPos,
__global
float
4*
restrict
posDelta,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float4*
restrict
clusterParams
)
{
__kernel
void
applyShakeToHydrogens
(
int
numClusters,
mixed
tol,
__global
const
real
4*
restrict
oldPos,
__global
const
real4*
restrict
posCorrection,
__global
mixed
4*
restrict
posDelta,
__global
const
int4*
restrict
clusterAtoms,
__global
const
float4*
restrict
clusterParams
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numClusters
)
{
//
Load
the
data
for
this
cluster.
int4
atoms
=
clusterAtoms[index]
;
float4
params
=
clusterParams[index]
;
float
4
pos
=
oldPos[
atoms.x
]
;
float
4
xpi
=
posDelta[atoms.x]
;
float
4
pos1
=
oldPos[
atoms.y
]
;
float
4
xpj1
=
posDelta[atoms.y]
;
float
4
pos2
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
float
4
xpj2
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
mixed
4
pos
=
loadPos
(
oldPos,
posCorrection,
atoms.x
)
;
mixed
4
xpi
=
posDelta[atoms.x]
;
mixed
4
pos1
=
loadPos
(
oldPos,
posCorrection,
atoms.y
)
;
mixed
4
xpj1
=
posDelta[atoms.y]
;
mixed
4
pos2
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
mixed
4
xpj2
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
float
invMassCentral
=
params.x
;
float
avgMass
=
params.y
;
float
d2
=
params.z
;
float
invMassPeripheral
=
params.w
;
if
(
atoms.z
!=
-1
)
{
pos2
=
oldPos[
atoms.z
]
;
pos2
=
loadPos
(
oldPos,
posCorrection,
atoms.z
)
;
xpj2
=
posDelta[atoms.z]
;
}
float
4
pos3
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
float
4
xpj3
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
mixed
4
pos3
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
mixed
4
xpj3
=
{0.0f,
0.0f,
0.0f,
0.0f}
;
if
(
atoms.w
!=
-1
)
{
pos3
=
oldPos[
atoms.w
]
;
pos3
=
loadPos
(
oldPos,
posCorrection,
atoms.w
)
;
xpj3
=
posDelta[atoms.w]
;
}
//
Precompute
quantities.
float
4
rij1
=
pos-pos1
;
float
4
rij2
=
pos-pos2
;
float
4
rij3
=
pos-pos3
;
float
rij1sq
=
rij1.x*rij1.x
+
rij1.y*rij1.y
+
rij1.z*rij1.z
;
float
rij2sq
=
rij2.x*rij2.x
+
rij2.y*rij2.y
+
rij2.z*rij2.z
;
float
rij3sq
=
rij3.x*rij3.x
+
rij3.y*rij3.y
+
rij3.z*rij3.z
;
float
ld1
=
d2-rij1sq
;
float
ld2
=
d2-rij2sq
;
float
ld3
=
d2-rij3sq
;
mixed
4
rij1
=
pos-pos1
;
mixed
4
rij2
=
pos-pos2
;
mixed
4
rij3
=
pos-pos3
;
mixed
rij1sq
=
rij1.x*rij1.x
+
rij1.y*rij1.y
+
rij1.z*rij1.z
;
mixed
rij2sq
=
rij2.x*rij2.x
+
rij2.y*rij2.y
+
rij2.z*rij2.z
;
mixed
rij3sq
=
rij3.x*rij3.x
+
rij3.y*rij3.y
+
rij3.z*rij3.z
;
mixed
ld1
=
d2-rij1sq
;
mixed
ld2
=
d2-rij2sq
;
mixed
ld3
=
d2-rij3sq
;
//
Iterate
until
convergence.
...
...
@@ -49,10 +59,10 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
while
(
iteration
<
15
&&
!converged
)
{
converged
=
true
;
#
ifdef
CONSTRAIN_VELOCITIES
float
4
rpij
=
xpi-xpj1
;
float
rrpr
=
rpij.x*rij1.x
+
rpij.y*rij1.y
+
rpij.z*rij1.z
;
float
delta
=
-2.0f*avgMass*rrpr/rij1sq
;
float
4
dr
=
rij1*delta
;
mixed
4
rpij
=
xpi-xpj1
;
mixed
rrpr
=
rpij.x*rij1.x
+
rpij.y*rij1.y
+
rpij.z*rij1.z
;
mixed
delta
=
-2.0f*avgMass*rrpr/rij1sq
;
mixed
4
dr
=
rij1*delta
;
xpi.xyz
+=
dr.xyz*invMassCentral
;
xpj1.xyz
-=
dr.xyz*invMassPeripheral
;
if
(
fabs
(
delta
)
>
tol
)
...
...
@@ -78,13 +88,13 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
converged
=
false
;
}
#
else
float
4
rpij
=
xpi-xpj1
;
float
rpsqij
=
rpij.x*rpij.x
+
rpij.y*rpij.y
+
rpij.z*rpij.z
;
float
rrpr
=
rij1.x*rpij.x
+
rij1.y*rpij.y
+
rij1.z*rpij.z
;
float
diff
=
fabs
(
ld1-2.0f*rrpr-rpsqij
)
/
(
d2*tol
)
;
mixed
4
rpij
=
xpi-xpj1
;
mixed
rpsqij
=
rpij.x*rpij.x
+
rpij.y*rpij.y
+
rpij.z*rpij.z
;
mixed
rrpr
=
rij1.x*rpij.x
+
rij1.y*rpij.y
+
rij1.z*rpij.z
;
mixed
diff
=
fabs
(
ld1-2.0f*rrpr-rpsqij
)
/
(
d2*tol
)
;
if
(
diff
>=
1.0f
)
{
float
acor
=
(
ld1-2.0f*rrpr-rpsqij
)
*avgMass
/
(
rrpr+rij1sq
)
;
float
4
dr
=
rij1*acor
;
mixed
acor
=
(
ld1-2.0f*rrpr-rpsqij
)
*avgMass
/
(
rrpr+rij1sq
)
;
mixed
4
dr
=
rij1*acor
;
xpi.xyz
+=
dr.xyz*invMassCentral
;
xpj1.xyz
-=
dr.xyz*invMassPeripheral
;
converged
=
false
;
...
...
@@ -95,8 +105,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
rrpr
=
rij2.x*rpij.x
+
rij2.y*rpij.y
+
rij2.z*rpij.z
;
diff
=
fabs
(
ld2-2.0f*rrpr-rpsqij
)
/
(
d2*tol
)
;
if
(
diff
>=
1.0f
)
{
float
acor
=
(
ld2
-
2.0f*rrpr
-
rpsqij
)
*avgMass
/
(
rrpr
+
rij2sq
)
;
float
4
dr
=
rij2*acor
;
mixed
acor
=
(
ld2
-
2.0f*rrpr
-
rpsqij
)
*avgMass
/
(
rrpr
+
rij2sq
)
;
mixed
4
dr
=
rij2*acor
;
xpi.xyz
+=
dr.xyz*invMassCentral
;
xpj2.xyz
-=
dr.xyz*invMassPeripheral
;
converged
=
false
;
...
...
@@ -108,8 +118,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
rrpr
=
rij3.x*rpij.x
+
rij3.y*rpij.y
+
rij3.z*rpij.z
;
diff
=
fabs
(
ld3
-
2.0f*rrpr
-
rpsqij
)
/
(
d2*tol
)
;
if
(
diff
>=
1.0f
)
{
float
acor
=
(
ld3-2.0f*rrpr-rpsqij
)
*avgMass
/
(
rrpr+rij3sq
)
;
float
4
dr
=
rij3*acor
;
mixed
acor
=
(
ld3-2.0f*rrpr-rpsqij
)
*avgMass
/
(
rrpr+rij3sq
)
;
mixed
4
dr
=
rij3*acor
;
xpi.xyz
+=
dr.xyz*invMassCentral
;
xpj3.xyz
-=
dr.xyz*invMassPeripheral
;
converged
=
false
;
...
...
platforms/opencl/src/kernels/verlet.cl
View file @
8d6a2a01
#
ifdef
SUPPORTS_DOUBLE_PRECISION
#
pragma
OPENCL
EXTENSION
cl_khr_fp64
:
enable
#
endif
/**
*
Perform
the
first
step
of
verlet
integration.
*/
__kernel
void
integrateVerletPart1
(
int
numAtoms,
__global
const
float
2*
restrict
dt,
__global
const
float
4*
restrict
posq,
__global
float
4*
restrict
velm,
__global
const
float
4*
restrict
force,
__global
float
4*
restrict
posDelta
)
{
float
2
stepSize
=
dt[0]
;
float
dtPos
=
stepSize.y
;
float
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
__kernel
void
integrateVerletPart1
(
int
numAtoms,
__global
const
mixed
2*
restrict
dt,
__global
const
real4*
restrict
posq,
__global
const
real
4*
restrict
posq
Correction
,
__global
mixed
4*
restrict
velm,
__global
const
real
4*
restrict
force,
__global
mixed
4*
restrict
posDelta
)
{
mixed
2
stepSize
=
dt[0]
;
mixed
dtPos
=
stepSize.y
;
mixed
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
float
4
velocity
=
velm[index]
;
mixed
4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
float4
pos
=
posq[index]
;
velocity.xyz
+=
force[index].xyz*dtVel*velocity.w
;
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
velocity.x
+=
force[index].x*dtVel*velocity.w
;
velocity.y
+=
force[index].y*dtVel*velocity.w
;
velocity.z
+=
force[index].z*dtVel*velocity.w
;
pos.xyz
=
velocity.xyz*dtPos
;
posDelta[index]
=
pos
;
velm[index]
=
velocity
;
...
...
@@ -28,8 +32,8 @@ __kernel void integrateVerletPart1(int numAtoms, __global const float2* restrict
*
Perform
the
second
step
of
verlet
integration.
*/
__kernel
void
integrateVerletPart2
(
int
numAtoms,
__global
float
2*
restrict
dt,
__global
float
4*
restrict
posq,
__global
float
4*
restrict
velm,
__global
const
float
4*
restrict
posDelta
)
{
float
2
stepSize
=
dt[0]
;
__kernel
void
integrateVerletPart2
(
int
numAtoms,
__global
mixed
2*
restrict
dt,
__global
real
4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed
4*
restrict
velm,
__global
const
mixed
4*
restrict
posDelta
)
{
mixed
2
stepSize
=
dt[0]
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
double
oneOverDt
=
1.0/stepSize.y
;
#
else
...
...
@@ -40,17 +44,28 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
float
4
velocity
=
velm[index]
;
mixed
4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
float4
pos
=
posq[index]
;
float4
delta
=
posDelta[index]
;
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
mixed4
delta
=
posDelta[index]
;
pos.xyz
+=
delta.xyz
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
velocity.xyz
=
convert_
float
4
(
convert_double4
(
delta
)
*oneOverDt
)
.
xyz
;
velocity.xyz
=
convert_
mixed
4
(
convert_double4
(
delta
)
*oneOverDt
)
.
xyz
;
#
else
velocity.xyz
=
delta.xyz*oneOverDt
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
velm[index]
=
velocity
;
}
index
+=
get_global_size
(
0
)
;
...
...
@@ -61,14 +76,14 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
*
Select
the
step
size
to
use
for
the
next
step.
*/
__kernel
void
selectVerletStepSize
(
int
numAtoms,
float
maxStepSize,
float
errorTol,
__global
float
2*
restrict
dt,
__global
const
float
4*
restrict
velm,
__global
const
float
4*
restrict
force,
__local
float
*
restrict
error
)
{
__kernel
void
selectVerletStepSize
(
int
numAtoms,
mixed
maxStepSize,
mixed
errorTol,
__global
mixed
2*
restrict
dt,
__global
const
mixed
4*
restrict
velm,
__global
const
real
4*
restrict
force,
__local
mixed
*
restrict
error
)
{
//
Calculate
the
error.
float
err
=
0
.0f
;
mixed
err
=
0
;
int
index
=
get_local_id
(
0
)
;
while
(
index
<
numAtoms
)
{
float
4
f
=
force[index]
;
float
invMass
=
velm[index].w
;
real
4
f
=
force[index]
;
mixed
invMass
=
velm[index].w
;
err
+=
(
f.x*f.x
+
f.y*f.y
+
f.z*f.z
)
*invMass
;
index
+=
get_global_size
(
0
)
;
}
...
...
@@ -83,9 +98,9 @@ __kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorT
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_local_id
(
0
)
==
0
)
{
float
totalError
=
sqrt
(
error[0]/
(
numAtoms*3
))
;
float
newStepSize
=
sqrt
(
errorTol/totalError
)
;
float
oldStepSize
=
dt[0].y
;
mixed
totalError
=
sqrt
(
error[0]/
(
numAtoms*3
))
;
mixed
newStepSize
=
sqrt
(
errorTol/totalError
)
;
mixed
oldStepSize
=
dt[0].y
;
if
(
oldStepSize
>
0.0f
)
newStepSize
=
min
(
newStepSize,
oldStepSize*2.0f
)
; // For safety, limit how quickly dt can increase.
if
(
newStepSize
>
oldStepSize
&&
newStepSize
<
1.1f*oldStepSize
)
...
...
platforms/opencl/src/kernels/virtualSites.cl
View file @
8d6a2a01
/**
*
Load
the
position
of
a
particle.
*/
mixed4
loadPos
(
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
int
index
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
return
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
return
posq[index]
;
#
endif
}
/**
*
Store
the
position
of
a
particle.
*/
void
storePos
(
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
int
index,
mixed4
pos
)
{
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
/**
*
Compute
the
positions
of
virtual
sites
*/
__kernel
void
computeVirtualSites
(
__global
float
4*
restrict
posq,
__global
const
int
4*
restrict
avg2Atoms
,
__global
const
float2
*
restrict
avg2
Weight
s,
__global
const
int4*
restrict
avg3Atoms,
__global
const
float
4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
float
4*
restrict
outOfPlaneWeights
)
{
__kernel
void
computeVirtualSites
(
__global
real
4*
restrict
posq,
__global
real
4*
restrict
posqCorrection
,
__global
const
int4
*
restrict
avg2
Atom
s,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real
4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real
4*
restrict
outOfPlaneWeights
)
{
//
Two
particle
average
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_2_AVERAGE; index += get_global_size(0)) {
int4
atoms
=
avg2Atoms[index]
;
float
2
weights
=
avg2Weights[index]
;
float
4
pos
=
posq[
atoms.x
]
;
float
4
pos1
=
posq[
atoms.y
]
;
float
4
pos2
=
posq[
atoms.z
]
;
real
2
weights
=
avg2Weights[index]
;
mixed
4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
mixed
4
pos1
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed
4
pos2
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
pos.xyz
=
pos1.xyz*weights.x
+
pos2.xyz*weights.y
;
posq[
atoms.x
]
=
pos
;
storePos
(
posq,
posqCorrection,
atoms.x
,
pos
)
;
}
//
Three
particle
average
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_3_AVERAGE; index += get_global_size(0)) {
int4
atoms
=
avg3Atoms[index]
;
float
4
weights
=
avg3Weights[index]
;
float
4
pos
=
posq[
atoms.x
]
;
float
4
pos1
=
posq[
atoms.y
]
;
float
4
pos2
=
posq[
atoms.z
]
;
float
4
pos3
=
posq[
atoms.w
]
;
real
4
weights
=
avg3Weights[index]
;
mixed
4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
mixed
4
pos1
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed
4
pos2
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
mixed
4
pos3
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
pos.xyz
=
pos1.xyz*weights.x
+
pos2.xyz*weights.y
+
pos3.xyz*weights.z
;
posq[
atoms.x
]
=
pos
;
storePos
(
posq,
posqCorrection,
atoms.x
,
pos
)
;
}
//
Out
of
plane
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
int4
atoms
=
outOfPlaneAtoms[index]
;
float
4
weights
=
outOfPlaneWeights[index]
;
float
4
pos
=
posq[
atoms.x
]
;
float
4
pos1
=
posq[
atoms.y
]
;
float
4
pos2
=
posq[
atoms.z
]
;
float
4
pos3
=
posq[
atoms.w
]
;
float
4
v12
=
pos2-pos1
;
float
4
v13
=
pos3-pos1
;
real
4
weights
=
outOfPlaneWeights[index]
;
mixed
4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
mixed
4
pos1
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed
4
pos2
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
mixed
4
pos3
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed
4
v12
=
pos2-pos1
;
mixed
4
v13
=
pos3-pos1
;
pos.xyz
=
pos1.xyz
+
v12.xyz*weights.x
+
v13.xyz*weights.y
+
cross
(
v12,
v13
)
.
xyz*weights.z
;
posq[
atoms.x
]
=
pos
;
storePos
(
posq,
posqCorrection,
atoms.x
,
pos
)
;
}
}
/**
*
Distribute
forces
from
virtual
sites
to
the
atoms
they
are
based
on.
*/
__kernel
void
distributeForces
(
__global
const
float
4*
restrict
posq,
__global
float
4*
restrict
force,
__global
const
int4*
restrict
avg2Atoms,
__global
const
float
2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
float
4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
float
4*
restrict
outOfPlaneWeights
)
{
__kernel
void
distributeForces
(
__global
const
real4*
restrict
posq,
__global
real
4*
restrict
posq
Correction
,
__global
real
4*
restrict
force,
__global
const
int4*
restrict
avg2Atoms,
__global
const
real
2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real
4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real
4*
restrict
outOfPlaneWeights
)
{
//
Two
particle
average
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_2_AVERAGE; index += get_global_size(0)) {
int4
atoms
=
avg2Atoms[index]
;
float
2
weights
=
avg2Weights[index]
;
float
4
f
=
force[atoms.x]
;
float
4
f1
=
force[atoms.y]
;
float
4
f2
=
force[atoms.z]
;
real
2
weights
=
avg2Weights[index]
;
real
4
f
=
force[atoms.x]
;
real
4
f1
=
force[atoms.y]
;
real
4
f2
=
force[atoms.z]
;
f1.xyz
+=
f.xyz*weights.x
;
f2.xyz
+=
f.xyz*weights.y
;
force[atoms.y]
=
f1
;
...
...
@@ -72,11 +97,11 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_3_AVERAGE; index += get_global_size(0)) {
int4
atoms
=
avg3Atoms[index]
;
float
4
weights
=
avg3Weights[index]
;
float
4
f
=
force[atoms.x]
;
float
4
f1
=
force[atoms.y]
;
float
4
f2
=
force[atoms.z]
;
float
4
f3
=
force[atoms.w]
;
real
4
weights
=
avg3Weights[index]
;
real
4
f
=
force[atoms.x]
;
real
4
f1
=
force[atoms.y]
;
real
4
f2
=
force[atoms.z]
;
real
4
f3
=
force[atoms.w]
;
f1.xyz
+=
f.xyz*weights.x
;
f2.xyz
+=
f.xyz*weights.y
;
f3.xyz
+=
f.xyz*weights.z
;
...
...
@@ -89,20 +114,20 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
int4
atoms
=
outOfPlaneAtoms[index]
;
float
4
weights
=
outOfPlaneWeights[index]
;
float
4
pos1
=
posq[
atoms.y
]
;
float
4
pos2
=
posq[
atoms.z
]
;
float
4
pos3
=
posq[
atoms.w
]
;
float
4
v12
=
pos2-pos1
;
float
4
v13
=
pos3-pos1
;
float
4
f
=
force[atoms.x]
;
float
4
f1
=
force[atoms.y]
;
float
4
f2
=
force[atoms.z]
;
float
4
f3
=
force[atoms.w]
;
float
4
fp2
=
(
float
4
)
(
weights.x*f.x
-
weights.z*v13.z*f.y
+
weights.z*v13.y*f.z,
real
4
weights
=
outOfPlaneWeights[index]
;
mixed
4
pos1
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed
4
pos2
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
mixed
4
pos3
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed
4
v12
=
pos2-pos1
;
mixed
4
v13
=
pos3-pos1
;
real
4
f
=
force[atoms.x]
;
real
4
f1
=
force[atoms.y]
;
real
4
f2
=
force[atoms.z]
;
real
4
f3
=
force[atoms.w]
;
real
4
fp2
=
(
real
4
)
(
weights.x*f.x
-
weights.z*v13.z*f.y
+
weights.z*v13.y*f.z,
weights.z*v13.z*f.x
+
weights.x*f.y
-
weights.z*v13.x*f.z,
-weights.z*v13.y*f.x
+
weights.z*v13.x*f.y
+
weights.x*f.z,
0.0f
)
;
float
4
fp3
=
(
float
4
)
(
weights.y*f.x
+
weights.z*v12.z*f.y
-
weights.z*v12.y*f.z,
real
4
fp3
=
(
real
4
)
(
weights.y*f.x
+
weights.z*v12.z*f.y
-
weights.z*v12.y*f.z,
-weights.z*v12.z*f.x
+
weights.y*f.y
+
weights.z*v12.x*f.z,
weights.z*v12.y*f.x
-
weights.z*v12.x*f.y
+
weights.y*f.z,
0.0f
)
;
f1.xyz
+=
f.xyz-fp2.xyz-fp3.xyz
;
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
8d6a2a01
...
...
@@ -51,7 +51,7 @@ using namespace std;
void
testTransform
()
{
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
"single"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
8d6a2a01
...
...
@@ -48,7 +48,7 @@ void testGaussian() {
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
"single"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
8d6a2a01
...
...
@@ -62,7 +62,7 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
"single"
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment