Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bcf9f9b4
Commit
bcf9f9b4
authored
Apr 25, 2013
by
Peter Eastman
Browse files
Fixed thread synchronization problem in RPMD kernels
parent
95696f7a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
26 deletions
+20
-26
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
+10
-13
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+10
-13
No files found.
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
View file @
bcf9f9b4
...
...
@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed
invMass
=
particleVelm
.
w
;
if
(
invMass
==
0
)
continue
;
mixed
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
// Forward FFT.
...
...
@@ -76,6 +74,7 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
// Inverse FFT.
FFT_V_BACKWARD
if
(
invMass
!=
0
)
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
...
...
@@ -102,11 +101,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
mixed4
particleVelm
=
velm
[
index
];
if
(
particleVelm
.
w
==
0
)
continue
;
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
if
(
particleVelm
.
w
!=
0
)
velm
[
index
]
=
particleVelm
;
}
...
...
@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
mixed4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
if
(
particleVelm
.
w
==
0
)
continue
;
// Forward FFT.
...
...
@@ -159,9 +155,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
FFT_Q_BACKWARD
FFT_V_BACKWARD
if
(
particleVelm
.
w
!=
0
)
{
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
}
}
}
/**
...
...
@@ -179,11 +177,10 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
mixed4
particleVelm
=
velm
[
index
];
if
(
particleVelm
.
w
==
0
)
continue
;
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
if
(
particleVelm
.
w
!=
0
)
velm
[
index
]
=
particleVelm
;
}
}
...
...
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
View file @
bcf9f9b4
...
...
@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed
invMass
=
particleVelm.w
;
if
(
invMass
==
0
)
continue
;
mixed
c3_0
=
c2_0*sqrt
(
nkT*invMass
)
;
//
Forward
FFT.
...
...
@@ -75,6 +73,7 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
//
Inverse
FFT.
FFT_V_BACKWARD
if
(
invMass
!=
0
)
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
randomIndex
+=
get_global_size
(
0
)
;
}
...
...
@@ -99,9 +98,8 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
mixed4
particleVelm
=
velm[index]
;
if
(
particleVelm.w
==
0
)
continue
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
if
(
particleVelm.w
!=
0
)
velm[index]
=
particleVelm
;
}
...
...
@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
mixed4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
if
(
particleVelm.w
==
0
)
continue
;
//
Forward
FFT.
...
...
@@ -154,9 +150,11 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
FFT_Q_BACKWARD
FFT_V_BACKWARD
if
(
particleVelm.w
!=
0
)
{
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*qreal[indexInBlock].xyz
;
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
}
}
}
/**
...
...
@@ -172,9 +170,8 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
mixed4
particleVelm
=
velm[index]
;
if
(
particleVelm.w
==
0
)
continue
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
if
(
particleVelm.w
!=
0
)
velm[index]
=
particleVelm
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment