Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bcf9f9b4
"openmmapi/vscode:/vscode.git/clone" did not exist on "cd93e5e03994ef183ea22ec0eb132f5bfc50fe7e"
Commit
bcf9f9b4
authored
Apr 25, 2013
by
Peter Eastman
Browse files
Fixed thread synchronization problem in RPMD kernels
parent
95696f7a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
26 deletions
+20
-26
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
+10
-13
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+10
-13
No files found.
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
View file @
bcf9f9b4
...
@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
...
@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed
invMass
=
particleVelm
.
w
;
mixed
invMass
=
particleVelm
.
w
;
if
(
invMass
==
0
)
continue
;
mixed
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
mixed
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
// Forward FFT.
// Forward FFT.
...
@@ -76,7 +74,8 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
...
@@ -76,7 +74,8 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
// Inverse FFT.
// Inverse FFT.
FFT_V_BACKWARD
FFT_V_BACKWARD
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
if
(
invMass
!=
0
)
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
...
@@ -102,12 +101,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
...
@@ -102,12 +101,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
mixed4
particleVelm
=
velm
[
index
];
mixed4
particleVelm
=
velm
[
index
];
if
(
particleVelm
.
w
==
0
)
continue
;
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
velm
[
index
]
=
particleVelm
;
if
(
particleVelm
.
w
!=
0
)
velm
[
index
]
=
particleVelm
;
}
}
// Evolve the free ring polymer by transforming to the frequency domain.
// Evolve the free ring polymer by transforming to the frequency domain.
...
@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
...
@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
mixed4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
if
(
particleVelm
.
w
==
0
)
continue
;
// Forward FFT.
// Forward FFT.
...
@@ -159,8 +155,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
...
@@ -159,8 +155,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
FFT_Q_BACKWARD
FFT_Q_BACKWARD
FFT_V_BACKWARD
FFT_V_BACKWARD
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
if
(
particleVelm
.
w
!=
0
)
{
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
}
}
}
}
}
...
@@ -179,12 +177,11 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
...
@@ -179,12 +177,11 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
mixed4
particleVelm
=
velm
[
index
];
mixed4
particleVelm
=
velm
[
index
];
if
(
particleVelm
.
w
==
0
)
continue
;
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
velm
[
index
]
=
particleVelm
;
if
(
particleVelm
.
w
!=
0
)
velm
[
index
]
=
particleVelm
;
}
}
}
}
...
...
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
View file @
bcf9f9b4
...
@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
...
@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed
invMass
=
particleVelm.w
;
mixed
invMass
=
particleVelm.w
;
if
(
invMass
==
0
)
continue
;
mixed
c3_0
=
c2_0*sqrt
(
nkT*invMass
)
;
mixed
c3_0
=
c2_0*sqrt
(
nkT*invMass
)
;
//
Forward
FFT.
//
Forward
FFT.
...
@@ -75,7 +73,8 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
...
@@ -75,7 +73,8 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
//
Inverse
FFT.
//
Inverse
FFT.
FFT_V_BACKWARD
FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
if
(
invMass
!=
0
)
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
randomIndex
+=
get_global_size
(
0
)
;
randomIndex
+=
get_global_size
(
0
)
;
}
}
}
}
...
@@ -99,10 +98,9 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
...
@@ -99,10 +98,9 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
mixed4
particleVelm
=
velm[index]
;
mixed4
particleVelm
=
velm[index]
;
if
(
particleVelm.w
==
0
)
continue
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
velm[index]
=
particleVelm
;
if
(
particleVelm.w
!=
0
)
velm[index]
=
particleVelm
;
}
}
//
Evolve
the
free
ring
polymer
by
transforming
to
the
frequency
domain.
//
Evolve
the
free
ring
polymer
by
transforming
to
the
frequency
domain.
...
@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
...
@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
mixed4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
if
(
particleVelm.w
==
0
)
continue
;
//
Forward
FFT.
//
Forward
FFT.
...
@@ -154,8 +150,10 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
...
@@ -154,8 +150,10 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
FFT_Q_BACKWARD
FFT_Q_BACKWARD
FFT_V_BACKWARD
FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*qreal[indexInBlock].xyz
;
if
(
particleVelm.w
!=
0
)
{
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*qreal[indexInBlock].xyz
;
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz
=
SCALE*vreal[indexInBlock].xyz
;
}
}
}
}
}
...
@@ -172,10 +170,9 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi
...
@@ -172,10 +170,9 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
mixed4
particleVelm
=
velm[index]
;
mixed4
particleVelm
=
velm[index]
;
if
(
particleVelm.w
==
0
)
continue
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
velm[index]
=
particleVelm
;
if
(
particleVelm.w
!=
0
)
velm[index]
=
particleVelm
;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment