Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e06d55f2
Commit
e06d55f2
authored
Jun 30, 2015
by
peastman
Browse files
Fixed bug in RPMD contractions
parent
fa6025df
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
8 deletions
+29
-8
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
+1
-0
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
+14
-4
plugins/rpmd/platforms/opencl/src/kernels/rpmdContraction.cl
plugins/rpmd/platforms/opencl/src/kernels/rpmdContraction.cl
+14
-4
No files found.
plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
View file @
e06d55f2
...
@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() {
...
@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() {
void
RPMDIntegrator
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
positions
)
{
void
RPMDIntegrator
::
setPositions
(
int
copy
,
const
vector
<
Vec3
>&
positions
)
{
kernel
.
getAs
<
IntegrateRPMDStepKernel
>
().
setPositions
(
copy
,
positions
);
kernel
.
getAs
<
IntegrateRPMDStepKernel
>
().
setPositions
(
copy
,
positions
);
forcesAreValid
=
false
;
hasSetPosition
=
true
;
hasSetPosition
=
true
;
}
}
...
...
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
View file @
e06d55f2
...
@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
...
@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
__shared__
mixed3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed2
w
[
NUM_COPIES
];
__shared__
mixed2
w1
[
NUM_COPIES
];
__shared__
mixed2
w2
[
NUM_CONTRACTED_COPIES
];
mixed3
*
qreal
=
&
q
[
blockStart
];
mixed3
*
qreal
=
&
q
[
blockStart
];
mixed3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
mixed3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
w1
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
if
(
threadIdx
.
x
<
NUM_CONTRACTED_COPIES
)
w2
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_CONTRACTED_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_CONTRACTED_COPIES
));
__syncthreads
();
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
// Load the particle position.
// Load the particle position.
...
@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
...
@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
// Forward FFT.
// Forward FFT.
__syncthreads
();
__syncthreads
();
mixed2
*
w
=
w1
;
FFT_Q_FORWARD
FFT_Q_FORWARD
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
// Compress the data to remove high frequencies.
// Compress the data to remove high frequencies.
...
@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
...
@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
qimag
[
indexInBlock
]
=
tempimag
[
indexInBlock
<
start
?
indexInBlock
:
indexInBlock
+
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)];
qimag
[
indexInBlock
]
=
tempimag
[
indexInBlock
<
start
?
indexInBlock
:
indexInBlock
+
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)];
}
}
__syncthreads
();
__syncthreads
();
w
=
w2
;
FFT_Q_BACKWARD
FFT_Q_BACKWARD
}
}
...
@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
...
@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
const
mixed
forceScale
=
1
/
(
mixed
)
0x100000000
;
const
mixed
forceScale
=
1
/
(
mixed
)
0x100000000
;
__shared__
mixed3
f
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
f
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed2
w
[
NUM_COPIES
];
__shared__
mixed2
w1
[
NUM_COPIES
];
__shared__
mixed2
w2
[
NUM_CONTRACTED_COPIES
];
mixed3
*
freal
=
&
f
[
blockStart
];
mixed3
*
freal
=
&
f
[
blockStart
];
mixed3
*
fimag
=
&
f
[
blockStart
+
blockDim
.
x
];
mixed3
*
fimag
=
&
f
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
w1
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
if
(
threadIdx
.
x
<
NUM_CONTRACTED_COPIES
)
w2
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_CONTRACTED_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_CONTRACTED_COPIES
));
__syncthreads
();
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
// Load the force.
// Load the force.
...
@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
...
@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
// Forward FFT.
// Forward FFT.
mixed2
*
w
=
w2
;
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
FFT_F_FORWARD
FFT_F_FORWARD
}
}
...
@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
...
@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
fimag
[
indexInBlock
]
=
(
indexInBlock
<
end
?
make_mixed3
(
0
)
:
tempimag
[
indexInBlock
-
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)]);
fimag
[
indexInBlock
]
=
(
indexInBlock
<
end
?
make_mixed3
(
0
)
:
tempimag
[
indexInBlock
-
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)]);
}
}
__syncthreads
();
__syncthreads
();
w
=
w1
;
FFT_F_BACKWARD
FFT_F_BACKWARD
// Store results.
// Store results.
...
...
plugins/rpmd/platforms/opencl/src/kernels/rpmdContraction.cl
View file @
e06d55f2
...
@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
...
@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
__local
mixed4
q[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
q[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed2
w[NUM_COPIES]
;
__local
mixed2
w1[NUM_COPIES]
;
__local
mixed2
w2[NUM_CONTRACTED_COPIES]
;
__local
mixed4*
qreal
=
&q[blockStart]
;
__local
mixed4*
qreal
=
&q[blockStart]
;
__local
mixed4*
qimag
=
&q[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
qimag
=
&q[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
tempreal
=
&temp[blockStart]
;
__local
mixed4*
tempreal
=
&temp[blockStart]
;
__local
mixed4*
tempimag
=
&temp[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
tempimag
=
&temp[blockStart+get_local_size
(
0
)
]
;
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
w[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
w1[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
if
(
get_local_id
(
0
)
<
NUM_CONTRACTED_COPIES
)
w2[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
//
Load
the
particle
position.
//
Load
the
particle
position.
...
@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
...
@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
//
Forward
FFT.
//
Forward
FFT.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__local
mixed2*
w
=
w1
;
FFT_Q_FORWARD
FFT_Q_FORWARD
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
//
Compress
the
data
to
remove
high
frequencies.
//
Compress
the
data
to
remove
high
frequencies.
...
@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
...
@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
qimag[indexInBlock]
=
tempimag[indexInBlock
<
start
?
indexInBlock
:
indexInBlock+
(
NUM_COPIES-NUM_CONTRACTED_COPIES
)
]
;
qimag[indexInBlock]
=
tempimag[indexInBlock
<
start
?
indexInBlock
:
indexInBlock+
(
NUM_COPIES-NUM_CONTRACTED_COPIES
)
]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
w
=
w2
;
FFT_Q_BACKWARD
FFT_Q_BACKWARD
}
}
...
@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
...
@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
__local
mixed4
f[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
f[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed2
w[NUM_COPIES]
;
__local
mixed2
w1[NUM_COPIES]
;
__local
mixed2
w2[NUM_CONTRACTED_COPIES]
;
__local
mixed4*
freal
=
&f[blockStart]
;
__local
mixed4*
freal
=
&f[blockStart]
;
__local
mixed4*
fimag
=
&f[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
fimag
=
&f[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
tempreal
=
&temp[blockStart]
;
__local
mixed4*
tempreal
=
&temp[blockStart]
;
__local
mixed4*
tempimag
=
&temp[blockStart+get_local_size
(
0
)
]
;
__local
mixed4*
tempimag
=
&temp[blockStart+get_local_size
(
0
)
]
;
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
w[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
w1[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
if
(
get_local_id
(
0
)
<
NUM_CONTRACTED_COPIES
)
w2[indexInBlock]
=
(
mixed2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
//
Load
the
force.
//
Load
the
force.
...
@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
...
@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
//
Forward
FFT.
//
Forward
FFT.
__local
mixed2*
w
=
w2
;
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
FFT_F_FORWARD
FFT_F_FORWARD
}
}
...
@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
...
@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
fimag[indexInBlock]
=
(
indexInBlock
<
end
?
(
mixed4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
:
tempimag[indexInBlock-
(
NUM_COPIES-NUM_CONTRACTED_COPIES
)
]
)
;
fimag[indexInBlock]
=
(
indexInBlock
<
end
?
(
mixed4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
:
tempimag[indexInBlock-
(
NUM_COPIES-NUM_CONTRACTED_COPIES
)
]
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
w
=
w1
;
FFT_F_BACKWARD
FFT_F_BACKWARD
//
Store
results.
//
Store
results.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment