Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cd6af26e
Commit
cd6af26e
authored
Nov 16, 2012
by
Peter Eastman
Browse files
RPMD supports mixed and double precision
parent
b8b2e1ef
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
484 additions
and
352 deletions
+484
-352
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+143
-96
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+1
-1
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
+80
-68
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
+5
-1
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
+3
-2
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+171
-125
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+2
-2
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+70
-54
plugins/rpmd/platforms/opencl/tests/CMakeLists.txt
plugins/rpmd/platforms/opencl/tests/CMakeLists.txt
+5
-1
plugins/rpmd/platforms/opencl/tests/TestOpenCLRpmd.cpp
plugins/rpmd/platforms/opencl/tests/TestOpenCLRpmd.cpp
+4
-2
No files found.
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
cd6af26e
This diff is collapsed.
Click to expand it.
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
View file @
cd6af26e
...
@@ -91,7 +91,7 @@ private:
...
@@ -91,7 +91,7 @@ private:
CudaArray
*
forces
;
CudaArray
*
forces
;
CudaArray
*
positions
;
CudaArray
*
positions
;
CudaArray
*
velocities
;
CudaArray
*
velocities
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copy
Positions
ToContextKernel
,
copy
VelocitiesToContextKernel
,
copyForces
FromContextKernel
,
translateKernel
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
plugins/rpmd/platforms/cuda/src/kernels/rpmd.cu
View file @
cd6af26e
__device__
float
3
multiplyComplexRealPart
(
float
2
c1
,
float
3
c2r
,
float
3
c2i
)
{
__device__
mixed
3
multiplyComplexRealPart
(
mixed
2
c1
,
mixed
3
c2r
,
mixed
3
c2i
)
{
return
c1
.
x
*
c2r
-
c1
.
y
*
c2i
;
return
c1
.
x
*
c2r
-
c1
.
y
*
c2i
;
}
}
__device__
float
3
multiplyComplexImagPart
(
float
2
c1
,
float
3
c2r
,
float
3
c2i
)
{
__device__
mixed
3
multiplyComplexImagPart
(
mixed
2
c1
,
mixed
3
c2r
,
mixed
3
c2i
)
{
return
c1
.
x
*
c2i
+
c1
.
y
*
c2r
;
return
c1
.
x
*
c2i
+
c1
.
y
*
c2r
;
}
}
__device__
float
3
multiplyComplexRealPartConj
(
float
2
c1
,
float
3
c2r
,
float
3
c2i
)
{
__device__
mixed
3
multiplyComplexRealPartConj
(
mixed
2
c1
,
mixed
3
c2r
,
mixed
3
c2i
)
{
return
c1
.
x
*
c2r
+
c1
.
y
*
c2i
;
return
c1
.
x
*
c2r
+
c1
.
y
*
c2i
;
}
}
__device__
float
3
multiplyComplexImagPartConj
(
float
2
c1
,
float
3
c2r
,
float
3
c2i
)
{
__device__
mixed
3
multiplyComplexImagPartConj
(
mixed
2
c1
,
mixed
3
c2r
,
mixed
3
c2i
)
{
return
c1
.
x
*
c2i
-
c1
.
y
*
c2r
;
return
c1
.
x
*
c2i
-
c1
.
y
*
c2r
;
}
}
/**
/**
* Apply the PILE-L thermostat.
* Apply the PILE-L thermostat.
*/
*/
extern
"C"
__global__
void
applyPileThermostat
(
float
4
*
velm
,
float4
*
random
,
unsigned
int
randomIndex
,
extern
"C"
__global__
void
applyPileThermostat
(
mixed
4
*
velm
,
float4
*
random
,
unsigned
int
randomIndex
,
float
dt
,
float
kT
,
float
friction
)
{
mixed
dt
,
mixed
kT
,
mixed
friction
)
{
const
int
numBlocks
=
blockDim
.
x
*
gridDim
.
x
/
NUM_COPIES
;
const
int
numBlocks
=
blockDim
.
x
*
gridDim
.
x
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
nkT
=
NUM_COPIES
*
kT
;
const
mixed
nkT
=
NUM_COPIES
*
kT
;
const
float
twown
=
2.0
f
*
nkT
/
HBAR
;
const
mixed
twown
=
2.0
f
*
nkT
/
HBAR
;
const
float
c1_0
=
EXP
(
-
0.5
f
*
dt
*
friction
);
const
mixed
c1_0
=
EXP
(
-
0.5
f
*
dt
*
friction
);
const
float
c2_0
=
SQRT
(
1.0
f
-
c1_0
*
c1_0
);
const
mixed
c2_0
=
SQRT
(
1.0
f
-
c1_0
*
c1_0
);
__shared__
float
3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed
3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float
3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed
3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float
2
w
[
NUM_COPIES
];
__shared__
mixed
2
w
[
NUM_COPIES
];
float
3
*
vreal
=
&
v
[
blockStart
];
mixed
3
*
vreal
=
&
v
[
blockStart
];
float
3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
mixed
3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_
float
2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
w
[
indexInBlock
]
=
make_
mixed
2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
__syncthreads
();
randomIndex
+=
NUM_COPIES
*
((
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
);
randomIndex
+=
NUM_COPIES
*
((
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
);
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
float
4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed
4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
float
invMass
=
particleVelm
.
w
;
mixed
invMass
=
particleVelm
.
w
;
float
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
mixed
c3_0
=
c2_0
*
SQRT
(
nkT
*
invMass
);
// Forward FFT.
// Forward FFT.
vreal
[
indexInBlock
]
=
SCALE
*
make_
float
3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vreal
[
indexInBlock
]
=
SCALE
*
make_
mixed
3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vimag
[
indexInBlock
]
=
make_
float
3
(
0
);
vimag
[
indexInBlock
]
=
make_
mixed
3
(
0
);
__syncthreads
();
__syncthreads
();
FFT_V_FORWARD
FFT_V_FORWARD
...
@@ -53,28 +53,28 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
...
@@ -53,28 +53,28 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
// Apply a local Langevin thermostat to the centroid mode.
// Apply a local Langevin thermostat to the centroid mode.
float4
rand
=
random
[
randomIndex
];
float4
rand
=
random
[
randomIndex
];
vreal
[
0
]
=
vreal
[
0
]
*
c1_0
+
c3_0
*
make_
float
3
(
rand
.
x
,
rand
.
y
,
rand
.
z
);
vreal
[
0
]
=
vreal
[
0
]
*
c1_0
+
c3_0
*
make_
mixed
3
(
rand
.
x
,
rand
.
y
,
rand
.
z
);
}
}
else
{
else
{
// Use critical damping white noise for the remaining modes.
// Use critical damping white noise for the remaining modes.
int
k
=
(
indexInBlock
<=
NUM_COPIES
/
2
?
indexInBlock
:
NUM_COPIES
-
indexInBlock
);
int
k
=
(
indexInBlock
<=
NUM_COPIES
/
2
?
indexInBlock
:
NUM_COPIES
-
indexInBlock
);
const
bool
isCenter
=
(
NUM_COPIES
%
2
==
0
&&
k
==
NUM_COPIES
/
2
);
const
bool
isCenter
=
(
NUM_COPIES
%
2
==
0
&&
k
==
NUM_COPIES
/
2
);
const
float
wk
=
twown
*
sin
(
k
*
M_PI
/
NUM_COPIES
);
const
mixed
wk
=
twown
*
sin
(
k
*
M_PI
/
NUM_COPIES
);
const
float
c1
=
EXP
(
-
wk
*
dt
);
const
mixed
c1
=
EXP
(
-
wk
*
dt
);
const
float
c2
=
SQRT
((
1.0
f
-
c1
*
c1
)
/
2.0
f
)
*
(
isCenter
?
sqrt
(
2.0
f
)
:
1.0
f
);
const
mixed
c2
=
SQRT
((
1.0
f
-
c1
*
c1
)
/
2.0
f
)
*
(
isCenter
?
sqrt
(
2.0
f
)
:
1.0
f
);
const
float
c3
=
c2
*
SQRT
(
nkT
*
invMass
);
const
mixed
c3
=
c2
*
SQRT
(
nkT
*
invMass
);
float4
rand1
=
c3
*
random
[
randomIndex
+
k
];
float4
rand1
=
random
[
randomIndex
+
k
];
float4
rand2
=
(
isCenter
?
make_float4
(
0
)
:
c3
*
random
[
randomIndex
+
NUM_COPIES
-
k
]);
float4
rand2
=
(
isCenter
?
make_float4
(
0
)
:
random
[
randomIndex
+
NUM_COPIES
-
k
]);
vreal
[
indexInBlock
]
=
c1
*
vreal
[
indexInBlock
]
+
make_
float
3
(
rand1
.
x
,
rand1
.
y
,
rand1
.
z
);
vreal
[
indexInBlock
]
=
c1
*
vreal
[
indexInBlock
]
+
c3
*
make_
mixed
3
(
rand1
.
x
,
rand1
.
y
,
rand1
.
z
);
vimag
[
indexInBlock
]
=
c1
*
vimag
[
indexInBlock
]
+
(
indexInBlock
<
NUM_COPIES
/
2
?
make_
float
3
(
rand2
.
x
,
rand2
.
y
,
rand2
.
z
)
:
make_
float
3
(
-
rand2
.
x
,
-
rand2
.
y
,
-
rand2
.
z
));
vimag
[
indexInBlock
]
=
c1
*
vimag
[
indexInBlock
]
+
c3
*
(
indexInBlock
<
NUM_COPIES
/
2
?
make_
mixed
3
(
rand2
.
x
,
rand2
.
y
,
rand2
.
z
)
:
make_
mixed
3
(
-
rand2
.
x
,
-
rand2
.
y
,
-
rand2
.
z
));
}
}
__syncthreads
();
__syncthreads
();
// Inverse FFT.
// Inverse FFT.
FFT_V_BACKWARD
FFT_V_BACKWARD
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
float
4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
mixed
4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
...
@@ -82,24 +82,24 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
...
@@ -82,24 +82,24 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
/**
/**
* Advance the positions and velocities.
* Advance the positions and velocities.
*/
*/
extern
"C"
__global__
void
integrateStep
(
float
4
*
posq
,
float
4
*
velm
,
long
long
*
force
,
float
dt
,
float
kT
)
{
extern
"C"
__global__
void
integrateStep
(
mixed
4
*
posq
,
mixed
4
*
velm
,
long
long
*
force
,
mixed
dt
,
mixed
kT
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
nkT
=
NUM_COPIES
*
kT
;
const
mixed
nkT
=
NUM_COPIES
*
kT
;
const
float
twown
=
2.0
f
*
nkT
/
HBAR
;
const
mixed
twown
=
2.0
f
*
nkT
/
HBAR
;
const
float
forceScale
=
1
/
(
float
)
0xFFFFFFFF
;
const
mixed
forceScale
=
1
/
(
mixed
)
0xFFFFFFFF
;
__shared__
float
3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed
3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float
3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed
3
v
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float
3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed
3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
float
2
w
[
NUM_COPIES
];
__shared__
mixed
2
w
[
NUM_COPIES
];
// Update velocities.
// Update velocities.
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
float
4
particleVelm
=
velm
[
index
];
mixed
4
particleVelm
=
velm
[
index
];
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
...
@@ -108,23 +108,23 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
...
@@ -108,23 +108,23 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
// Evolve the free ring polymer by transforming to the frequency domain.
// Evolve the free ring polymer by transforming to the frequency domain.
float
3
*
qreal
=
&
q
[
blockStart
];
mixed
3
*
qreal
=
&
q
[
blockStart
];
float
3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
mixed
3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
float
3
*
vreal
=
&
v
[
blockStart
];
mixed
3
*
vreal
=
&
v
[
blockStart
];
float
3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
mixed
3
*
vimag
=
&
v
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_
float
2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
w
[
indexInBlock
]
=
make_
mixed
2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
float
4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed
4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
float
4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
mixed
4
particleVelm
=
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
// Forward FFT.
// Forward FFT.
qreal
[
indexInBlock
]
=
SCALE
*
make_
float
3
(
particlePosq
.
x
,
particlePosq
.
y
,
particlePosq
.
z
);
qreal
[
indexInBlock
]
=
SCALE
*
make_
mixed
3
(
particlePosq
.
x
,
particlePosq
.
y
,
particlePosq
.
z
);
qimag
[
indexInBlock
]
=
make_
float
3
(
0
);
qimag
[
indexInBlock
]
=
make_
mixed
3
(
0
);
vreal
[
indexInBlock
]
=
SCALE
*
make_
float
3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vreal
[
indexInBlock
]
=
SCALE
*
make_
mixed
3
(
particleVelm
.
x
,
particleVelm
.
y
,
particleVelm
.
z
);
vimag
[
indexInBlock
]
=
make_
float
3
(
0
);
vimag
[
indexInBlock
]
=
make_
mixed
3
(
0
);
__syncthreads
();
__syncthreads
();
FFT_Q_FORWARD
FFT_Q_FORWARD
FFT_V_FORWARD
FFT_V_FORWARD
...
@@ -136,12 +136,12 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
...
@@ -136,12 +136,12 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
qimag
[
0
]
+=
vimag
[
0
]
*
dt
;
qimag
[
0
]
+=
vimag
[
0
]
*
dt
;
}
}
else
{
else
{
const
float
wk
=
twown
*
sin
(
indexInBlock
*
M_PI
/
NUM_COPIES
);
const
mixed
wk
=
twown
*
sin
(
indexInBlock
*
M_PI
/
NUM_COPIES
);
const
float
wt
=
wk
*
dt
;
const
mixed
wt
=
wk
*
dt
;
const
float
coswt
=
cos
(
wt
);
const
mixed
coswt
=
cos
(
wt
);
const
float
sinwt
=
sin
(
wt
);
const
mixed
sinwt
=
sin
(
wt
);
const
float
3
vprimereal
=
vreal
[
indexInBlock
]
*
coswt
-
qreal
[
indexInBlock
]
*
(
wk
*
sinwt
);
// Advance velocity from t to t+dt
const
mixed
3
vprimereal
=
vreal
[
indexInBlock
]
*
coswt
-
qreal
[
indexInBlock
]
*
(
wk
*
sinwt
);
// Advance velocity from t to t+dt
const
float
3
vprimeimag
=
vimag
[
indexInBlock
]
*
coswt
-
qimag
[
indexInBlock
]
*
(
wk
*
sinwt
);
const
mixed
3
vprimeimag
=
vimag
[
indexInBlock
]
*
coswt
-
qimag
[
indexInBlock
]
*
(
wk
*
sinwt
);
qreal
[
indexInBlock
]
=
vreal
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qreal
[
indexInBlock
]
*
coswt
;
// Advance position from t to t+dt
qreal
[
indexInBlock
]
=
vreal
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qreal
[
indexInBlock
]
*
coswt
;
// Advance position from t to t+dt
qimag
[
indexInBlock
]
=
vimag
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qimag
[
indexInBlock
]
*
coswt
;
qimag
[
indexInBlock
]
=
vimag
[
indexInBlock
]
*
(
sinwt
/
wk
)
+
qimag
[
indexInBlock
]
*
coswt
;
vreal
[
indexInBlock
]
=
vprimereal
;
vreal
[
indexInBlock
]
=
vprimereal
;
...
@@ -153,26 +153,26 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
...
@@ -153,26 +153,26 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
FFT_Q_BACKWARD
FFT_Q_BACKWARD
FFT_V_BACKWARD
FFT_V_BACKWARD
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
float
4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
mixed
4
(
SCALE
*
qreal
[
indexInBlock
].
x
,
SCALE
*
qreal
[
indexInBlock
].
y
,
SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
float
4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
velm
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_
mixed
4
(
SCALE
*
vreal
[
indexInBlock
].
x
,
SCALE
*
vreal
[
indexInBlock
].
y
,
SCALE
*
vreal
[
indexInBlock
].
z
,
particleVelm
.
w
);
}
}
}
}
/**
/**
* Advance the velocities by a half step.
* Advance the velocities by a half step.
*/
*/
extern
"C"
__global__
void
advanceVelocities
(
float
4
*
velm
,
long
long
*
force
,
float
dt
)
{
extern
"C"
__global__
void
advanceVelocities
(
mixed
4
*
velm
,
long
long
*
force
,
mixed
dt
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
float
forceScale
=
1
/
(
float
)
0xFFFFFFFF
;
const
mixed
forceScale
=
1
/
(
mixed
)
0xFFFFFFFF
;
// Update velocities.
// Update velocities.
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
index
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
float
4
particleVelm
=
velm
[
index
];
mixed
4
particleVelm
=
velm
[
index
];
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
x
+=
forceScale
*
force
[
forceIndex
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
y
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
particleVelm
.
z
+=
forceScale
*
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
*
(
0.5
f
*
dt
*
particleVelm
.
w
);
...
@@ -181,9 +181,20 @@ extern "C" __global__ void advanceVelocities(float4* velm, long long* force, flo
...
@@ -181,9 +181,20 @@ extern "C" __global__ void advanceVelocities(float4* velm, long long* force, flo
}
}
/**
/**
* Copy a set of p
er-atom value
s from the integrator's arrays to the context.
* Copy a set of p
osition
s from the integrator's arrays to the context.
*/
*/
extern
"C"
__global__
void
copyToContext
(
float4
*
src
,
float4
*
dst
,
int
*
order
,
int
copy
)
{
extern
"C"
__global__
void
copyPositionsToContext
(
mixed4
*
src
,
real4
*
dst
,
int
*
order
,
int
copy
)
{
const
int
base
=
copy
*
PADDED_NUM_ATOMS
;
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
posq
=
src
[
base
+
order
[
particle
]];
dst
[
particle
]
=
make_real4
(
posq
.
x
,
posq
.
y
,
posq
.
z
,
posq
.
w
);
}
}
/**
* Copy a set of velocities from the integrator's arrays to the context.
*/
extern
"C"
__global__
void
copyVelocitiesToContext
(
mixed4
*
src
,
mixed4
*
dst
,
int
*
order
,
int
copy
)
{
const
int
base
=
copy
*
PADDED_NUM_ATOMS
;
const
int
base
=
copy
*
PADDED_NUM_ATOMS
;
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
dst
[
particle
]
=
src
[
base
+
order
[
particle
]];
dst
[
particle
]
=
src
[
base
+
order
[
particle
]];
...
@@ -191,9 +202,9 @@ extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, i
...
@@ -191,9 +202,9 @@ extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, i
}
}
/**
/**
* Copy a set of
per-atom force valu
es from the context to the integrator's arrays.
* Copy a set of
forc
es from the context to the integrator's arrays.
*/
*/
extern
"C"
__global__
void
copyFromContext
(
long
long
*
src
,
long
long
*
dst
,
int
*
order
,
int
copy
)
{
extern
"C"
__global__
void
copyF
orcesF
romContext
(
long
long
*
src
,
long
long
*
dst
,
int
*
order
,
int
copy
)
{
const
int
base
=
copy
*
PADDED_NUM_ATOMS
*
3
;
const
int
base
=
copy
*
PADDED_NUM_ATOMS
*
3
;
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
dst
[
base
+
order
[
particle
]]
=
src
[
particle
];
dst
[
base
+
order
[
particle
]]
=
src
[
particle
];
...
@@ -205,10 +216,11 @@ extern "C" __global__ void copyFromContext(long long* src, long long* dst, int*
...
@@ -205,10 +216,11 @@ extern "C" __global__ void copyFromContext(long long* src, long long* dst, int*
/**
/**
* Update atom positions so all copies are offset by the same number of periodic box widths.
* Update atom positions so all copies are offset by the same number of periodic box widths.
*/
*/
extern
"C"
__global__
void
applyCellTranslations
(
float
4
*
posq
,
float
4
*
movedPos
,
int
*
order
,
int
movedCopy
)
{
extern
"C"
__global__
void
applyCellTranslations
(
mixed
4
*
posq
,
real
4
*
movedPos
,
int
*
order
,
int
movedCopy
)
{
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
particle
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particle
<
NUM_ATOMS
;
particle
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
order
[
particle
];
int
index
=
order
[
particle
];
float4
delta
=
movedPos
[
particle
]
-
posq
[
movedCopy
*
PADDED_NUM_ATOMS
+
index
];
real4
p
=
movedPos
[
particle
];
mixed4
delta
=
make_mixed4
(
p
.
x
,
p
.
y
,
p
.
z
,
p
.
w
)
-
posq
[
movedCopy
*
PADDED_NUM_ATOMS
+
index
];
for
(
int
copy
=
0
;
copy
<
NUM_COPIES
;
copy
++
)
for
(
int
copy
=
0
;
copy
<
NUM_COPIES
;
copy
++
)
posq
[
copy
*
PADDED_NUM_ATOMS
+
index
]
+=
delta
;
posq
[
copy
*
PADDED_NUM_ATOMS
+
index
]
+=
delta
;
}
}
...
...
plugins/rpmd/platforms/cuda/tests/CMakeLists.txt
View file @
cd6af26e
...
@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
...
@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_RPMD_TARGET
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_RPMD_TARGET
}
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ADD_TEST
(
${
TEST_ROOT
}
Single
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
IF
(
OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS
)
ADD_TEST
(
${
TEST_ROOT
}
Mixed
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
mixed
)
ADD_TEST
(
${
TEST_ROOT
}
Double
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
double
)
ENDIF
(
OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
View file @
cd6af26e
...
@@ -165,7 +165,6 @@ void testParaHydrogen() {
...
@@ -165,7 +165,6 @@ void testParaHydrogen() {
vector
<
int
>
counts
(
numBins
,
0
);
vector
<
int
>
counts
(
numBins
,
0
);
const
double
invBoxSize
=
1.0
/
boxSize
;
const
double
invBoxSize
=
1.0
/
boxSize
;
double
meanKE
=
0.0
;
double
meanKE
=
0.0
;
const
RealOpenMM
hbar
=
1.054571628e-34
*
AVOGADRO
/
(
1000
*
1e-12
);
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
integ
.
step
(
20
);
integ
.
step
(
20
);
vector
<
State
>
states
(
numCopies
);
vector
<
State
>
states
(
numCopies
);
...
@@ -221,9 +220,11 @@ void testParaHydrogen() {
...
@@ -221,9 +220,11 @@ void testParaHydrogen() {
ASSERT_USUALLY_EQUAL_TOL
(
60.0
,
1.5
*
temperature
+
meanKE
,
0.02
);
ASSERT_USUALLY_EQUAL_TOL
(
60.0
,
1.5
*
temperature
+
meanKE
,
0.02
);
}
}
int
main
()
{
int
main
(
int
argc
,
char
*
argv
[]
)
{
try
{
try
{
Platform
::
loadPluginsFromDirectory
(
Platform
::
getDefaultPluginsDirectory
());
Platform
::
loadPluginsFromDirectory
(
Platform
::
getDefaultPluginsDirectory
());
if
(
argc
>
1
)
Platform
::
getPlatformByName
(
"CUDA"
).
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testFreeParticles
();
testFreeParticles
();
testParaHydrogen
();
testParaHydrogen
();
}
}
...
...
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
View file @
cd6af26e
This diff is collapsed.
Click to expand it.
plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
View file @
cd6af26e
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -92,7 +92,7 @@ private:
...
@@ -92,7 +92,7 @@ private:
OpenCLArray
*
forces
;
OpenCLArray
*
forces
;
OpenCLArray
*
positions
;
OpenCLArray
*
positions
;
OpenCLArray
*
velocities
;
OpenCLArray
*
velocities
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
cl
::
Kernel
pileKernel
,
stepKernel
,
velocitiesKernel
,
copy
Positions
ToContextKernel
,
copy
VelocitiesToContextKernel
,
copyForces
FromContextKernel
,
translateKernel
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
View file @
cd6af26e
float
4
multiplyComplexRealPart
(
float
2
c1,
float
4
c2r,
float
4
c2i
)
{
mixed
4
multiplyComplexRealPart
(
mixed
2
c1,
mixed
4
c2r,
mixed
4
c2i
)
{
return
c1.x*c2r-c1.y*c2i
;
return
c1.x*c2r-c1.y*c2i
;
}
}
float
4
multiplyComplexImagPart
(
float
2
c1,
float
4
c2r,
float
4
c2i
)
{
mixed
4
multiplyComplexImagPart
(
mixed
2
c1,
mixed
4
c2r,
mixed
4
c2i
)
{
return
c1.x*c2i+c1.y*c2r
;
return
c1.x*c2i+c1.y*c2r
;
}
}
float
4
multiplyComplexRealPartConj
(
float
2
c1,
float
4
c2r,
float
4
c2i
)
{
mixed
4
multiplyComplexRealPartConj
(
mixed
2
c1,
mixed
4
c2r,
mixed
4
c2i
)
{
return
c1.x*c2r+c1.y*c2i
;
return
c1.x*c2r+c1.y*c2i
;
}
}
float
4
multiplyComplexImagPartConj
(
float
2
c1,
float
4
c2r,
float
4
c2i
)
{
mixed
4
multiplyComplexImagPartConj
(
mixed
2
c1,
mixed
4
c2r,
mixed
4
c2i
)
{
return
c1.x*c2i-c1.y*c2r
;
return
c1.x*c2i-c1.y*c2r
;
}
}
/**
/**
*
Apply
the
PILE-L
thermostat.
*
Apply
the
PILE-L
thermostat.
*/
*/
__kernel
void
applyPileThermostat
(
__global
float4*
velm,
__local
float4*
v,
__local
float4*
temp,
__local
float2*
w
,
__global
float4*
random,
unsigned
int
randomIndex,
__kernel
void
applyPileThermostat
(
__global
mixed4*
velm
,
__global
float4*
random,
unsigned
int
randomIndex,
float
dt,
float
kT,
float
friction
)
{
mixed
dt,
mixed
kT,
mixed
friction
)
{
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
float
nkT
=
NUM_COPIES*kT
;
const
mixed
nkT
=
NUM_COPIES*kT
;
const
float
twown
=
2.0f*nkT/HBAR
;
const
mixed
twown
=
2.0f*nkT/HBAR
;
const
float
c1_0
=
EXP
(
-0.5f*dt*friction
)
;
const
mixed
c1_0
=
exp
(
-0.5f*dt*friction
)
;
const
float
c2_0
=
SQRT
(
1.0f-c1_0*c1_0
)
;
const
mixed
c2_0
=
sqrt
(
1.0f-c1_0*c1_0
)
;
__local
float4*
vreal
=
&v[blockStart]
;
__local
mixed4
v[2*THREAD_BLOCK_SIZE]
;
__local
float4*
vimag
=
&v[blockStart+get_local_size
(
0
)
]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed2
w[NUM_COPIES]
;
__local
mixed4*
vreal
=
&v[blockStart]
;
__local
mixed4*
vimag
=
&v[blockStart+get_local_size
(
0
)
]
;
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
w[indexInBlock]
=
(
float
2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
w[indexInBlock]
=
(
mixed
2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
randomIndex
+=
NUM_COPIES*
(
get_global_id
(
0
)
/NUM_COPIES
)
;
randomIndex
+=
NUM_COPIES*
(
get_global_id
(
0
)
/NUM_COPIES
)
;
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
float
4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed
4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
float
invMass
=
particleVelm.w
;
mixed
invMass
=
particleVelm.w
;
float
c3_0
=
c2_0*
SQRT
(
nkT*invMass
)
;
mixed
c3_0
=
c2_0*
sqrt
(
nkT*invMass
)
;
//
Forward
FFT.
//
Forward
FFT.
vreal[indexInBlock]
=
SCALE*particleVelm
;
vreal[indexInBlock]
=
SCALE*particleVelm
;
vimag[indexInBlock]
=
(
float
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
vimag[indexInBlock]
=
(
mixed
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
FFT_V_FORWARD
FFT_V_FORWARD
...
@@ -49,19 +52,19 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
...
@@ -49,19 +52,19 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
if
(
indexInBlock
==
0
)
{
if
(
indexInBlock
==
0
)
{
//
Apply
a
local
Langevin
thermostat
to
the
centroid
mode.
//
Apply
a
local
Langevin
thermostat
to
the
centroid
mode.
vreal[0].xyz
=
vreal[0].xyz*c1_0
+
c3_0*random[randomIndex].xyz
;
vreal[0].xyz
=
vreal[0].xyz*c1_0
+
c3_0*
convert_mixed4
(
random[randomIndex]
)
.
xyz
;
}
}
else
{
else
{
//
Use
critical
damping
white
noise
for
the
remaining
modes.
//
Use
critical
damping
white
noise
for
the
remaining
modes.
int
k
=
(
indexInBlock
<=
NUM_COPIES/2
?
indexInBlock
:
NUM_COPIES-indexInBlock
)
;
int
k
=
(
indexInBlock
<=
NUM_COPIES/2
?
indexInBlock
:
NUM_COPIES-indexInBlock
)
;
const
bool
isCenter
=
(
NUM_COPIES%2
==
0
&&
k
==
NUM_COPIES/2
)
;
const
bool
isCenter
=
(
NUM_COPIES%2
==
0
&&
k
==
NUM_COPIES/2
)
;
const
float
wk
=
twown*sin
(
k*M_PI/NUM_COPIES
)
;
const
mixed
wk
=
twown*sin
(
k*M_PI/NUM_COPIES
)
;
const
float
c1
=
EXP
(
-wk*dt
)
;
const
mixed
c1
=
exp
(
-wk*dt
)
;
const
float
c2
=
SQRT
((
1.0f-c1*c1
)
/2.0f
)
*
(
isCenter
?
sqrt
(
2.0f
)
:
1.0f
)
;
const
mixed
c2
=
sqrt
((
1.0f-c1*c1
)
/2.0f
)
*
(
isCenter
?
sqrt
(
2.0f
)
:
1.0f
)
;
const
float
c3
=
c2*
SQRT
(
nkT*invMass
)
;
const
mixed
c3
=
c2*
sqrt
(
nkT*invMass
)
;
float
4
rand1
=
c3*random[randomIndex+k]
;
mixed
4
rand1
=
c3*
convert_mixed4
(
random[randomIndex+k]
)
;
float
4
rand2
=
(
isCenter
?
0.0f
:
c3*random[randomIndex+NUM_COPIES-k]
)
;
mixed
4
rand2
=
(
isCenter
?
0.0f
:
c3*
convert_mixed4
(
random[randomIndex+NUM_COPIES-k]
)
)
;
vreal[indexInBlock].xyz
=
c1*vreal[indexInBlock].xyz
+
rand1.xyz
;
vreal[indexInBlock].xyz
=
c1*vreal[indexInBlock].xyz
+
rand1.xyz
;
vimag[indexInBlock].xyz
=
c1*vimag[indexInBlock].xyz
+
(
indexInBlock
<
NUM_COPIES/2
?
rand2.xyz
:
-rand2.xyz
)
;
vimag[indexInBlock].xyz
=
c1*vimag[indexInBlock].xyz
+
(
indexInBlock
<
NUM_COPIES/2
?
rand2.xyz
:
-rand2.xyz
)
;
}
}
...
@@ -78,42 +81,45 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
...
@@ -78,42 +81,45 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
/**
/**
*
Advance
the
positions
and
velocities.
*
Advance
the
positions
and
velocities.
*/
*/
__kernel
void
integrateStep
(
__global
float4*
posq,
__global
float4*
velm,
__global
float4*
force,
__kernel
void
integrateStep
(
__global
mixed4*
posq,
__global
mixed4*
velm,
__global
real4*
force,
mixed
dt,
mixed
kT
)
{
__local
float4*
q,
__local
float4*
v,
__local
float4*
temp,
__local
float2*
w,
float
dt,
float
kT
)
{
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
float
nkT
=
NUM_COPIES*kT
;
const
mixed
nkT
=
NUM_COPIES*kT
;
const
float
twown
=
2.0f*nkT/HBAR
;
const
mixed
twown
=
2.0f*nkT/HBAR
;
__local
mixed4
q[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
v[2*THREAD_BLOCK_SIZE]
;
__local
mixed4
temp[2*THREAD_BLOCK_SIZE]
;
__local
mixed2
w[NUM_COPIES]
;
//
Update
velocities.
//
Update
velocities.
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
float
4
particleVelm
=
velm[index]
;
mixed
4
particleVelm
=
velm[index]
;
particleVelm.xyz
+=
force[index].xyz*
(
0.5f*dt*particleVelm.w
)
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
velm[index]
=
particleVelm
;
velm[index]
=
particleVelm
;
}
}
//
Evolve
the
free
ring
polymer
by
transforming
to
the
frequency
domain.
//
Evolve
the
free
ring
polymer
by
transforming
to
the
frequency
domain.
__local
float
4*
qreal
=
&q[blockStart]
;
__local
mixed
4*
qreal
=
&q[blockStart]
;
__local
float
4*
qimag
=
&q[blockStart+get_local_size
(
0
)
]
;
__local
mixed
4*
qimag
=
&q[blockStart+get_local_size
(
0
)
]
;
__local
float
4*
vreal
=
&v[blockStart]
;
__local
mixed
4*
vreal
=
&v[blockStart]
;
__local
float
4*
vimag
=
&v[blockStart+get_local_size
(
0
)
]
;
__local
mixed
4*
vimag
=
&v[blockStart+get_local_size
(
0
)
]
;
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
if
(
get_local_id
(
0
)
<
NUM_COPIES
)
w[indexInBlock]
=
(
float
2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
w[indexInBlock]
=
(
mixed
2
)
(
cos
(
-indexInBlock*2*M_PI/NUM_COPIES
)
,
sin
(
-indexInBlock*2*M_PI/NUM_COPIES
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
float
4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed
4
particlePosq
=
posq[particle+indexInBlock*PADDED_NUM_ATOMS]
;
float
4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
mixed
4
particleVelm
=
velm[particle+indexInBlock*PADDED_NUM_ATOMS]
;
//
Forward
FFT.
//
Forward
FFT.
qreal[indexInBlock]
=
SCALE*particlePosq
;
qreal[indexInBlock]
=
SCALE*particlePosq
;
qimag[indexInBlock]
=
(
float
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
qimag[indexInBlock]
=
(
mixed
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
vreal[indexInBlock]
=
SCALE*particleVelm
;
vreal[indexInBlock]
=
SCALE*particleVelm
;
vimag[indexInBlock]
=
(
float
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
vimag[indexInBlock]
=
(
mixed
4
)
(
0.0f,
0.0f,
0.0f,
0.0f
)
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
barrier
(
CLK_GLOBAL_MEM_FENCE
)
;
FFT_Q_FORWARD
FFT_Q_FORWARD
FFT_V_FORWARD
FFT_V_FORWARD
...
@@ -125,12 +131,12 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
...
@@ -125,12 +131,12 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
qimag[0].xyz
+=
vimag[0].xyz*dt
;
qimag[0].xyz
+=
vimag[0].xyz*dt
;
}
}
else
{
else
{
const
float
wk
=
twown*sin
(
indexInBlock*M_PI/NUM_COPIES
)
;
const
mixed
wk
=
twown*sin
(
indexInBlock*M_PI/NUM_COPIES
)
;
const
float
wt
=
wk*dt
;
const
mixed
wt
=
wk*dt
;
const
float
coswt
=
cos
(
wt
)
;
const
mixed
coswt
=
cos
(
wt
)
;
const
float
sinwt
=
sin
(
wt
)
;
const
mixed
sinwt
=
sin
(
wt
)
;
const
float
4
vprimereal
=
vreal[indexInBlock]*coswt
-
qreal[indexInBlock]*
(
wk*sinwt
)
; // Advance velocity from t to t+dt
const
mixed
4
vprimereal
=
vreal[indexInBlock]*coswt
-
qreal[indexInBlock]*
(
wk*sinwt
)
; // Advance velocity from t to t+dt
const
float
4
vprimeimag
=
vimag[indexInBlock]*coswt
-
qimag[indexInBlock]*
(
wk*sinwt
)
;
const
mixed
4
vprimeimag
=
vimag[indexInBlock]*coswt
-
qimag[indexInBlock]*
(
wk*sinwt
)
;
qreal[indexInBlock]
=
vreal[indexInBlock]*
(
sinwt/wk
)
+
qreal[indexInBlock]*coswt
; // Advance position from t to t+dt
qreal[indexInBlock]
=
vreal[indexInBlock]*
(
sinwt/wk
)
+
qreal[indexInBlock]*coswt
; // Advance position from t to t+dt
qimag[indexInBlock]
=
vimag[indexInBlock]*
(
sinwt/wk
)
+
qimag[indexInBlock]*coswt
;
qimag[indexInBlock]
=
vimag[indexInBlock]*
(
sinwt/wk
)
+
qimag[indexInBlock]*coswt
;
vreal[indexInBlock]
=
vprimereal
;
vreal[indexInBlock]
=
vprimereal
;
...
@@ -150,7 +156,7 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
...
@@ -150,7 +156,7 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
/**
/**
*
Advance
the
velocities
by
a
half
step.
*
Advance
the
velocities
by
a
half
step.
*/
*/
__kernel
void
advanceVelocities
(
__global
float
4*
velm,
__global
float
4*
force,
float
dt
)
{
__kernel
void
advanceVelocities
(
__global
mixed
4*
velm,
__global
real
4*
force,
mixed
dt
)
{
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
numBlocks
=
get_global_size
(
0
)
/NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
blockStart
=
NUM_COPIES*
(
get_local_id
(
0
)
/NUM_COPIES
)
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
const
int
indexInBlock
=
get_local_id
(
0
)
-blockStart
;
...
@@ -159,16 +165,26 @@ __kernel void advanceVelocities(__global float4* velm, __global float4* force, f
...
@@ -159,16 +165,26 @@ __kernel void advanceVelocities(__global float4* velm, __global float4* force, f
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
for
(
int
particle
=
get_global_id
(
0
)
/NUM_COPIES
; particle < NUM_ATOMS; particle += numBlocks) {
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
int
index
=
particle+indexInBlock*PADDED_NUM_ATOMS
;
float
4
particleVelm
=
velm[index]
;
mixed
4
particleVelm
=
velm[index]
;
particleVelm.xyz
+=
force[index].xyz*
(
0.5f*dt*particleVelm.w
)
;
particleVelm.xyz
+=
convert_mixed4
(
force[index]
)
.
xyz*
(
0.5f*dt*particleVelm.w
)
;
velm[index]
=
particleVelm
;
velm[index]
=
particleVelm
;
}
}
}
}
/**
/**
*
Copy
a
set
of
p
er-atom
value
s
from
the
integrator
's
arrays
to
the
context.
*
Copy
a
set
of
p
osition
s
from
the
integrator
's
arrays
to
the
context.
*/
*/
__kernel
void
copyToContext
(
__global
float4*
src,
__global
float4*
dst,
__global
int*
order,
int
copy
)
{
__kernel
void
copyPositionsToContext
(
__global
mixed4*
src,
__global
real4*
dst,
__global
int*
order,
int
copy
)
{
const
int
base
=
copy*PADDED_NUM_ATOMS
;
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[particle]
=
convert_real4
(
src[base+order[particle]]
)
;
}
}
/**
*
Copy
a
set
of
velocities
from
the
integrator
's
arrays
to
the
context.
*/
__kernel
void
copyVelocitiesToContext
(
__global
mixed4*
src,
__global
mixed4*
dst,
__global
int*
order,
int
copy
)
{
const
int
base
=
copy*PADDED_NUM_ATOMS
;
const
int
base
=
copy*PADDED_NUM_ATOMS
;
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[particle]
=
src[base+order[particle]]
;
dst[particle]
=
src[base+order[particle]]
;
...
@@ -176,9 +192,9 @@ __kernel void copyToContext(__global float4* src, __global float4* dst, __global
...
@@ -176,9 +192,9 @@ __kernel void copyToContext(__global float4* src, __global float4* dst, __global
}
}
/**
/**
*
Copy
a
set
of
per-atom
valu
es
from
the
context
to
the
integrator
's
arrays.
*
Copy
a
set
forc
es
from
the
context
to
the
integrator
's
arrays.
*/
*/
__kernel
void
copyFromContext
(
__global
float
4*
src,
__global
float
4*
dst,
__global
int*
order,
int
copy
)
{
__kernel
void
copyF
orcesF
romContext
(
__global
real
4*
src,
__global
real
4*
dst,
__global
int*
order,
int
copy
)
{
const
int
base
=
copy*PADDED_NUM_ATOMS
;
const
int
base
=
copy*PADDED_NUM_ATOMS
;
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[base+order[particle]]
=
src[particle]
;
dst[base+order[particle]]
=
src[particle]
;
...
@@ -188,10 +204,10 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob
...
@@ -188,10 +204,10 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob
/**
/**
*
Update
atom
positions
so
all
copies
are
offset
by
the
same
number
of
periodic
box
widths.
*
Update
atom
positions
so
all
copies
are
offset
by
the
same
number
of
periodic
box
widths.
*/
*/
__kernel
void
applyCellTranslations
(
__global
float
4*
posq,
__global
float
4*
movedPos,
__global
int*
order,
int
movedCopy
)
{
__kernel
void
applyCellTranslations
(
__global
mixed
4*
posq,
__global
real
4*
movedPos,
__global
int*
order,
int
movedCopy
)
{
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
for
(
int
particle
=
get_global_id
(
0
)
; particle < NUM_ATOMS; particle += get_global_size(0)) {
int
index
=
order[particle]
;
int
index
=
order[particle]
;
float
4
delta
=
movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index]
;
mixed
4
delta
=
convert_mixed4
(
movedPos[particle]
)
-posq[movedCopy*PADDED_NUM_ATOMS+index]
;
for
(
int
copy
=
0
; copy < NUM_COPIES; copy++)
for
(
int
copy
=
0
; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index]
+=
delta
;
posq[copy*PADDED_NUM_ATOMS+index]
+=
delta
;
}
}
...
...
plugins/rpmd/platforms/opencl/tests/CMakeLists.txt
View file @
cd6af26e
...
@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
...
@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_RPMD_TARGET
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_RPMD_TARGET
}
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ADD_TEST
(
${
TEST_ROOT
}
Single
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
IF
(
OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS
)
ADD_TEST
(
${
TEST_ROOT
}
Mixed
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
mixed
)
ADD_TEST
(
${
TEST_ROOT
}
Double
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
double
)
ENDIF
(
OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
plugins/rpmd/platforms/opencl/tests/TestOpenCLRpmd.cpp
View file @
cd6af26e
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2012
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -221,9 +221,11 @@ void testParaHydrogen() {
...
@@ -221,9 +221,11 @@ void testParaHydrogen() {
ASSERT_USUALLY_EQUAL_TOL
(
60.0
,
1.5
*
temperature
+
meanKE
,
0.02
);
ASSERT_USUALLY_EQUAL_TOL
(
60.0
,
1.5
*
temperature
+
meanKE
,
0.02
);
}
}
int
main
()
{
int
main
(
int
argc
,
char
*
argv
[]
)
{
try
{
try
{
Platform
::
loadPluginsFromDirectory
(
Platform
::
getDefaultPluginsDirectory
());
Platform
::
loadPluginsFromDirectory
(
Platform
::
getDefaultPluginsDirectory
());
if
(
argc
>
1
)
Platform
::
getPlatformByName
(
"OpenCL"
).
setPropertyDefaultValue
(
"OpenCLPrecision"
,
string
(
argv
[
1
]));
testFreeParticles
();
testFreeParticles
();
testParaHydrogen
();
testParaHydrogen
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment