Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6d7f0273
Commit
6d7f0273
authored
Jul 16, 2013
by
peastman
Browse files
Improved integration accuracy on devices that don't support double precision
parent
09970632
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
2 deletions
+24
-2
platforms/cuda/src/kernels/langevin.cu
platforms/cuda/src/kernels/langevin.cu
+11
-0
platforms/cuda/src/kernels/verlet.cu
platforms/cuda/src/kernels/verlet.cu
+9
-0
platforms/opencl/src/kernels/langevin.cl
platforms/opencl/src/kernels/langevin.cl
+2
-1
platforms/opencl/src/kernels/verlet.cl
platforms/opencl/src/kernels/verlet.cl
+2
-1
No files found.
platforms/cuda/src/kernels/langevin.cu
View file @
6d7f0273
...
...
@@ -32,7 +32,12 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
*/
extern
"C"
__global__
void
integrateLangevinPart2
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
#if __CUDA_ARCH__ >= 130
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
#else
float
invStepSize
=
1.0
f
/
dt
[
0
].
y
;
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
index
<
NUM_ATOMS
)
{
mixed4
vel
=
velm
[
index
];
...
...
@@ -48,9 +53,15 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#if __CUDA_ARCH__ >= 130
vel
.
x
=
(
mixed
)
(
invStepSize
*
delta
.
x
);
vel
.
y
=
(
mixed
)
(
invStepSize
*
delta
.
y
);
vel
.
z
=
(
mixed
)
(
invStepSize
*
delta
.
z
);
#else
vel
.
x
=
invStepSize
*
delta
.
x
+
correction
*
delta
.
x
;
vel
.
y
=
invStepSize
*
delta
.
y
+
correction
*
delta
.
x
;
vel
.
z
=
invStepSize
*
delta
.
z
+
correction
*
delta
.
x
;
#endif
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
...
...
platforms/cuda/src/kernels/verlet.cu
View file @
6d7f0273
...
...
@@ -37,7 +37,12 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
extern
"C"
__global__
void
integrateVerletPart2
(
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
mixed2
stepSize
=
dt
[
0
];
#if __CUDA_ARCH__ >= 130
double
oneOverDt
=
1.0
/
stepSize
.
y
;
#else
float
oneOverDt
=
1.0
f
/
stepSize
.
y
;
float
correction
=
(
1.0
f
-
oneOverDt
*
stepSize
.
y
)
/
stepSize
.
y
;
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
...
...
@@ -55,7 +60,11 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
#if __CUDA_ARCH__ >= 130
velocity
=
make_mixed4
((
mixed
)
(
delta
.
x
*
oneOverDt
),
(
mixed
)
(
delta
.
y
*
oneOverDt
),
(
mixed
)
(
delta
.
z
*
oneOverDt
),
velocity
.
w
);
#else
velocity
=
make_mixed4
((
mixed
)
(
delta
.
x
*
oneOverDt
+
delta
.
x
*
correction
),
(
mixed
)
(
delta
.
y
*
oneOverDt
+
delta
.
y
*
correction
),
(
mixed
)
(
delta
.
z
*
oneOverDt
+
delta
.
z
*
correction
),
velocity
.
w
);
#endif
#ifdef USE_MIXED_PRECISION
posq
[
index
]
=
make_real4
((
real
)
pos
.
x
,
(
real
)
pos
.
y
,
(
real
)
pos
.
z
,
(
real
)
pos
.
w
);
posqCorrection
[
index
]
=
make_real4
(
pos
.
x
-
(
real
)
pos
.
x
,
pos
.
y
-
(
real
)
pos
.
y
,
pos
.
z
-
(
real
)
pos
.
z
,
0
);
...
...
platforms/opencl/src/kernels/langevin.cl
View file @
6d7f0273
...
...
@@ -36,6 +36,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
double
invStepSize
=
1.0/dt[0].y
;
#
else
float
invStepSize
=
1.0f/dt[0].y
;
float
correction
=
(
1.0f-invStepSize*dt[0].y
)
/dt[0].y
;
#
endif
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
...
...
@@ -53,7 +54,7 @@ __kernel void integrateLangevinPart2(__global real4* restrict posq, __global rea
#
ifdef
SUPPORTS_DOUBLE_PRECISION
vel.xyz
=
convert_mixed4
(
invStepSize*convert_double4
(
delta
))
.
xyz
;
#
else
vel.xyz
=
invStepSize*delta.xyz
;
vel.xyz
=
invStepSize*delta.xyz
+
correction*delta.xyz
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
...
...
platforms/opencl/src/kernels/verlet.cl
View file @
6d7f0273
...
...
@@ -38,6 +38,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
double
oneOverDt
=
1.0/stepSize.y
;
#
else
float
oneOverDt
=
1.0f/stepSize.y
;
float
correction
=
(
1.0f-oneOverDt*stepSize.y
)
/stepSize.y
;
#
endif
if
(
get_global_id
(
0
)
==
0
)
dt[0].x
=
stepSize.y
;
...
...
@@ -58,7 +59,7 @@ __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, _
#
ifdef
SUPPORTS_DOUBLE_PRECISION
velocity.xyz
=
convert_mixed4
(
convert_double4
(
delta
)
*oneOverDt
)
.
xyz
;
#
else
velocity.xyz
=
delta.xyz*oneOverDt
;
velocity.xyz
=
delta.xyz*oneOverDt
+
delta.xyz*correction
;
#
endif
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
convert_real4
(
pos
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment