Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8725cede
"vscode:/vscode.git/clone" did not exist on "27777280f602cd887c19e890d91ddc72cd8661a0"
Commit
8725cede
authored
Apr 22, 2010
by
Peter Eastman
Browse files
Converted Cuda platform to use improved Langevin integrator
parent
15a8478e
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
71 additions
and
243 deletions
+71
-243
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+0
-6
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-7
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+11
-76
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-2
platforms/cuda/src/kernels/kBrownianUpdate.cu
platforms/cuda/src/kernels/kBrownianUpdate.cu
+18
-18
platforms/cuda/src/kernels/kCalculateAndersenThermostat.cu
platforms/cuda/src/kernels/kCalculateAndersenThermostat.cu
+1
-1
platforms/cuda/src/kernels/kLangevinUpdate.cu
platforms/cuda/src/kernels/kLangevinUpdate.cu
+16
-57
platforms/cuda/src/kernels/kLangevinUpdate.h
platforms/cuda/src/kernels/kLangevinUpdate.h
+12
-65
platforms/cuda/src/kernels/kRandom.cu
platforms/cuda/src/kernels/kRandom.cu
+11
-11
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
8725cede
...
...
@@ -869,9 +869,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
prevStepSize
=
stepSize
;
}
kLangevinUpdatePart1
(
gpu
);
kApplyFirstShake
(
gpu
);
kApplyFirstSettle
(
gpu
);
kApplyFirstCCMA
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
...
...
@@ -991,9 +988,6 @@ void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, cons
float
maxStepSize
=
(
float
)(
maxTime
-
data
.
time
);
kSelectLangevinStepSize
(
gpu
,
maxStepSize
);
kLangevinUpdatePart1
(
gpu
);
kApplyFirstShake
(
gpu
);
kApplyFirstSettle
(
gpu
);
kApplyFirstCCMA
(
gpu
);
if
(
data
.
removeCM
)
if
(
data
.
stepCount
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
8725cede
...
...
@@ -471,8 +471,6 @@ struct cudaGmxSimulation {
float4
*
pPosqP
;
// Pointer to mid-integration atom positions
float4
*
pOldPosq
;
// Pointer to old atom positions
float4
*
pVelm4
;
// Pointer to atom velocity and inverse mass
float4
*
pvVector4
;
// Pointer to atom v Vector
float4
*
pxVector4
;
// Pointer to atom x Vector
float4
*
pForce4
;
// Pointer to force data
float
*
pEnergy
;
// Pointer to energy output buffer
float
*
pBornForce
;
// Pointer to Born force data
...
...
@@ -482,15 +480,12 @@ struct cudaGmxSimulation {
float4
*
pLinearMomentum
;
// Pointer to linear momentum
// Random numbers
float4
*
pRandom4a
;
// Pointer to first set of 4 random numbers
float4
*
pRandom4b
;
// Pointer to second set of 4 random numbers
float2
*
pRandom2a
;
// Pointer to first set of 2 random numbers
float2
*
pRandom2b
;
// Pointer to second set of 2 random numbers
float4
*
pRandom4
;
// Pointer to 4 random numbers
float2
*
pRandom2
;
// Pointer to 2 random numbers
uint4
*
pRandomSeed
;
// Pointer to random seeds
int
*
pRandomPosition
;
// Pointer to random number positions
unsigned
int
randoms
;
// Number of randoms
unsigned
int
totalRandoms
;
// Number of randoms plus overflow.
unsigned
int
totalRandomsTimesTwo
;
// Used for generating randoms
unsigned
int
randomIterations
;
// Number of iterations before regenerating randoms
unsigned
int
randomFrames
;
// Number of frames of random numbers
};
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
8725cede
...
...
@@ -1651,10 +1651,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
pOldPosq
=
gpu
->
psOldPosq4
->
_pDevStream
[
0
];
gpu
->
psVelm4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"Velm"
);
gpu
->
sim
.
pVelm4
=
gpu
->
psVelm4
->
_pDevStream
[
0
];
gpu
->
psvVector4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"vVector"
);
gpu
->
sim
.
pvVector4
=
gpu
->
psvVector4
->
_pDevStream
[
0
];
gpu
->
psxVector4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"xVector"
);
gpu
->
sim
.
pxVector4
=
gpu
->
psxVector4
->
_pDevStream
[
0
];
gpu
->
psBornRadii
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"BornRadii"
);
gpu
->
sim
.
pBornRadii
=
gpu
->
psBornRadii
->
_pDevStream
[
0
];
gpu
->
psObcChain
=
new
CUDAStream
<
float
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcChain"
);
...
...
@@ -1669,7 +1665,7 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
pStepSize
=
gpu
->
psStepSize
->
_pDevStream
[
0
];
(
*
gpu
->
psStepSize
)[
0
]
=
make_float2
(
0.0
f
,
0.0
f
);
gpu
->
psStepSize
->
Upload
();
gpu
->
psLangevinParameters
=
new
CUDAStream
<
float
>
(
11
,
1
,
"LangevinParameters"
);
gpu
->
psLangevinParameters
=
new
CUDAStream
<
float
>
(
3
,
1
,
"LangevinParameters"
);
gpu
->
sim
.
pLangevinParameters
=
gpu
->
psLangevinParameters
->
_pDevStream
[
0
];
gpu
->
pAtomSymbol
=
new
unsigned
char
[
gpu
->
natoms
];
gpu
->
psAtomIndex
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"AtomIndex"
);
...
...
@@ -1684,15 +1680,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
randomIterations
=
gpu
->
sim
.
randomFrames
;
gpu
->
sim
.
randoms
=
gpu
->
sim
.
randomFrames
*
gpu
->
sim
.
paddedNumberOfAtoms
;
gpu
->
sim
.
totalRandoms
=
gpu
->
sim
.
randoms
+
gpu
->
sim
.
paddedNumberOfAtoms
;
gpu
->
sim
.
totalRandomsTimesTwo
=
gpu
->
sim
.
totalRandoms
*
2
;
gpu
->
psRandom4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
totalRandomsTimesTwo
,
1
,
"Random4"
);
gpu
->
psRandom2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
totalRandomsTimesTwo
,
1
,
"Random2"
);
gpu
->
psRandom4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
totalRandoms
,
1
,
"Random4"
);
gpu
->
psRandom2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
totalRandoms
,
1
,
"Random2"
);
gpu
->
psRandomPosition
=
new
CUDAStream
<
int
>
(
gpu
->
sim
.
blocks
,
1
,
"RandomPosition"
);
gpu
->
psRandomSeed
=
new
CUDAStream
<
uint4
>
(
gpu
->
sim
.
blocks
*
gpu
->
sim
.
random_threads_per_block
,
1
,
"RandomSeed"
);
gpu
->
sim
.
pRandom4a
=
gpu
->
psRandom4
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandom2a
=
gpu
->
psRandom2
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandom4b
=
gpu
->
psRandom4
->
_pDevStream
[
0
]
+
gpu
->
sim
.
totalRandoms
;
gpu
->
sim
.
pRandom2b
=
gpu
->
psRandom2
->
_pDevStream
[
0
]
+
gpu
->
sim
.
totalRandoms
;
gpu
->
sim
.
pRandom4
=
gpu
->
psRandom4
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandom2
=
gpu
->
psRandom2
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandomPosition
=
gpu
->
psRandomPosition
->
_pDevStream
[
0
];
gpu
->
sim
.
pRandomSeed
=
gpu
->
psRandomSeed
->
_pDevStream
[
0
];
...
...
@@ -1884,14 +1877,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
natoms
=
numAtoms
;
gpuAllocateInitialBuffers
(
gpu
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
(
*
gpu
->
psxVector4
)[
i
].
x
=
0.0
f
;
(
*
gpu
->
psxVector4
)[
i
].
y
=
0.0
f
;
(
*
gpu
->
psxVector4
)[
i
].
z
=
0.0
f
;
(
*
gpu
->
psxVector4
)[
i
].
w
=
0.0
f
;
}
gpu
->
psxVector4
->
Upload
();
gpu
->
iterations
=
0
;
gpu
->
sim
.
update_threads_per_block
=
(
gpu
->
natoms
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
...
...
@@ -2023,60 +2008,12 @@ void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT
gpu
->
sim
.
tau
=
tau
;
gpu
->
sim
.
T
=
temperature
;
gpu
->
sim
.
kT
=
BOLTZ
*
gpu
->
sim
.
T
;
double
GDT
=
gpu
->
sim
.
deltaT
/
gpu
->
sim
.
tau
;
double
EPH
=
exp
(
0.5
*
GDT
);
double
EMH
=
exp
(
-
0.5
*
GDT
);
double
EP
=
exp
(
GDT
);
double
EM
=
exp
(
-
GDT
);
double
B
,
C
,
D
;
if
(
GDT
>=
0.1
)
{
double
term1
=
EPH
-
1.0
;
term1
*=
term1
;
B
=
GDT
*
(
EP
-
1.0
)
-
4.0
*
term1
;
C
=
GDT
-
3.0
+
4.0
*
EMH
-
EM
;
D
=
2.0
-
EPH
-
EMH
;
}
else
{
double
term1
=
0.5
*
GDT
;
double
term2
=
term1
*
term1
;
double
term4
=
term2
*
term2
;
double
third
=
1.0
/
3.0
;
double
o7_9
=
7.0
/
9.0
;
double
o1_12
=
1.0
/
12.0
;
double
o17_90
=
17.0
/
90.0
;
double
o7_30
=
7.0
/
30.0
;
double
o31_1260
=
31.0
/
1260.0
;
double
o_360
=
1.0
/
360.0
;
B
=
term4
*
(
third
+
term1
*
(
third
+
term1
*
(
o17_90
+
term1
*
o7_9
)));
C
=
term2
*
term1
*
(
2.0
*
third
+
term1
*
(
-
0.5
+
term1
*
(
o7_30
+
term1
*
(
-
o1_12
+
term1
*
o31_1260
))));
D
=
term2
*
(
-
1.0
+
term2
*
(
-
o1_12
-
term2
*
o_360
));
}
double
DOverTauC
=
D
/
(
gpu
->
sim
.
tau
*
C
);
double
TauOneMinusEM
=
gpu
->
sim
.
tau
*
(
1.0
-
EM
);
double
TauDOverEMMinusOne
=
gpu
->
sim
.
tau
*
D
/
(
EM
-
1.0
);
double
fix1
=
gpu
->
sim
.
tau
*
(
EPH
-
EMH
);
if
(
fix1
==
0.0
)
fix1
=
deltaT
;
double
oneOverFix1
=
1.0
/
fix1
;
double
V
=
sqrt
(
gpu
->
sim
.
kT
*
(
1.0
-
EM
));
double
X
=
gpu
->
sim
.
tau
*
sqrt
(
gpu
->
sim
.
kT
*
C
);
double
Yv
=
sqrt
(
gpu
->
sim
.
kT
*
B
/
C
);
double
Yx
=
gpu
->
sim
.
tau
*
sqrt
(
gpu
->
sim
.
kT
*
B
/
(
1.0
-
EM
));
(
*
gpu
->
psLangevinParameters
)[
0
]
=
(
float
)
EM
;
(
*
gpu
->
psLangevinParameters
)[
1
]
=
(
float
)
EM
;
(
*
gpu
->
psLangevinParameters
)[
2
]
=
(
float
)
DOverTauC
;
(
*
gpu
->
psLangevinParameters
)[
3
]
=
(
float
)
TauOneMinusEM
;
(
*
gpu
->
psLangevinParameters
)[
4
]
=
(
float
)
TauDOverEMMinusOne
;
(
*
gpu
->
psLangevinParameters
)[
5
]
=
(
float
)
V
;
(
*
gpu
->
psLangevinParameters
)[
6
]
=
(
float
)
X
;
(
*
gpu
->
psLangevinParameters
)[
7
]
=
(
float
)
Yv
;
(
*
gpu
->
psLangevinParameters
)[
8
]
=
(
float
)
Yx
;
(
*
gpu
->
psLangevinParameters
)[
9
]
=
(
float
)
fix1
;
(
*
gpu
->
psLangevinParameters
)[
10
]
=
(
float
)
oneOverFix1
;
double
vscale
=
exp
(
-
deltaT
/
tau
);
double
fscale
=
(
1
-
vscale
)
*
tau
;
double
noisescale
=
sqrt
(
2
*
gpu
->
sim
.
kT
/
tau
)
*
sqrt
(
0.5
*
(
1
-
vscale
*
vscale
)
*
tau
);
(
*
gpu
->
psLangevinParameters
)[
0
]
=
(
float
)
vscale
;
(
*
gpu
->
psLangevinParameters
)[
1
]
=
(
float
)
fscale
;
(
*
gpu
->
psLangevinParameters
)[
2
]
=
(
float
)
noisescale
;
gpu
->
psLangevinParameters
->
Upload
();
gpu
->
psStepSize
->
Download
();
(
*
gpu
->
psStepSize
)[
0
].
y
=
deltaT
;
...
...
@@ -2129,8 +2066,6 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psVelm4
;
delete
gpu
->
psForce4
;
delete
gpu
->
psEnergy
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
if
(
gpu
->
psCustomParams
!=
NULL
)
delete
gpu
->
psCustomParams
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
8725cede
...
...
@@ -101,8 +101,6 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psVelm4
;
CUDAStream
<
float4
>*
psForce4
;
CUDAStream
<
float
>*
psEnergy
;
// Energy output buffer
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float4
>*
psCustomParams
;
// Atom parameters for custom nonbonded force
CUDAStream
<
int4
>*
psCustomBondID
;
// Atom indices for custom bonds
...
...
platforms/cuda/src/kernels/kBrownianUpdate.cu
View file @
8725cede
...
...
@@ -51,14 +51,14 @@ void GetBrownianUpdateSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kBrownianUpdatePart1_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
...
...
@@ -67,7 +67,7 @@ void kBrownianUpdatePart1_kernel()
while
(
pos
<
cSim
.
atoms
)
{
float4
random4a
=
cSim
.
pRandom4
a
[
rpos
+
pos
];
float4
random4a
=
cSim
.
pRandom4
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
invMass
=
cSim
.
pVelm4
[
pos
].
w
;
...
...
@@ -90,15 +90,15 @@ void kBrownianUpdatePart1(gpuContext gpu)
LAUNCHERROR
(
"kBrownianUpdatePart1"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kBrownianUpdatePart2_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kBrownianUpdatePart2_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
...
...
platforms/cuda/src/kernels/kCalculateAndersenThermostat.cu
View file @
8725cede
...
...
@@ -62,7 +62,7 @@ __global__ void kCalculateAndersenThermostat_kernel()
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
random4a
=
cSim
.
pRandom4
a
[
rpos
+
pos
];
float4
random4a
=
cSim
.
pRandom4
[
rpos
+
pos
];
float
scale
=
(
random4a
.
w
>
-
randomRange
&&
random4a
.
w
<
randomRange
?
0.0
f
:
1.0
f
);
float
add
=
(
1.0
f
-
scale
)
*
sqrt
(
cSim
.
kT
*
velocity
.
w
);
velocity
.
x
=
scale
*
velocity
.
x
+
add
*
random4a
.
x
;
...
...
platforms/cuda/src/kernels/kLangevinUpdate.cu
View file @
8725cede
...
...
@@ -34,7 +34,7 @@ using namespace std;
#include "gputypes.h"
enum
{
EM
,
EM_V
,
DOverTauC
,
TauOneMinusEM_V
,
TauDOverEMMinusOne
,
V
,
X
,
Yv
,
Yx
,
Fix1
,
OneOverFix1
,
MaxParams
};
enum
{
VelScale
,
ForceScale
,
NoiseScale
,
MaxParams
};
static
__constant__
cudaGmxSimulation
cSim
;
...
...
@@ -101,15 +101,15 @@ void kLangevinUpdatePart2(gpuContext gpu)
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kSelectLangevinStepSize_kernel
(
float
maxStepSize
)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kSelectLangevinStepSize_kernel
(
float
maxStepSize
)
{
// Calculate the error.
...
...
@@ -151,53 +151,12 @@ void kSelectLangevinStepSize_kernel(float maxStepSize)
// Recalculate the integration parameters.
float
gdt
=
newStepSize
/
cSim
.
tau
;
float
eph
=
exp
(
0.5
f
*
gdt
);
float
emh
=
exp
(
-
0.5
f
*
gdt
);
float
ep
=
exp
(
gdt
);
float
em
=
exp
(
-
gdt
);
float
em_v
=
exp
(
-
0.5
f
*
(
oldStepSize
+
newStepSize
)
/
cSim
.
tau
);
float
b
,
c
,
d
;
if
(
gdt
>=
0.1
f
)
{
float
term1
=
eph
-
1.0
f
;
term1
*=
term1
;
b
=
gdt
*
(
ep
-
1.0
f
)
-
4.0
f
*
term1
;
c
=
gdt
-
3.0
f
+
4.0
f
*
emh
-
em
;
d
=
2.0
f
-
eph
-
emh
;
}
else
{
float
term1
=
0.5
f
*
gdt
;
float
term2
=
term1
*
term1
;
float
term4
=
term2
*
term2
;
float
third
=
1.0
f
/
3.0
f
;
float
o7_9
=
7.0
f
/
9.0
f
;
float
o1_12
=
1.0
f
/
12.0
f
;
float
o17_90
=
17.0
f
/
90.0
f
;
float
o7_30
=
7.0
f
/
30.0
f
;
float
o31_1260
=
31.0
f
/
1260.0
f
;
float
o_360
=
1.0
f
/
360.0
f
;
b
=
term4
*
(
third
+
term1
*
(
third
+
term1
*
(
o17_90
+
term1
*
o7_9
)));
c
=
term2
*
term1
*
(
2.0
f
*
third
+
term1
*
(
-
0.5
f
+
term1
*
(
o7_30
+
term1
*
(
-
o1_12
+
term1
*
o31_1260
))));
d
=
term2
*
(
-
1.0
f
+
term2
*
(
-
o1_12
-
term2
*
o_360
));
}
float
fix1
=
cSim
.
tau
*
(
eph
-
emh
);
if
(
fix1
==
0.0
f
)
fix1
=
newStepSize
;
params
[
EM
]
=
em
;
params
[
EM_V
]
=
em_v
;
params
[
DOverTauC
]
=
d
/
(
cSim
.
tau
*
c
);
params
[
TauOneMinusEM_V
]
=
cSim
.
tau
*
(
1.0
f
-
em_v
);
params
[
TauDOverEMMinusOne
]
=
cSim
.
tau
*
d
/
(
em
-
1.0
f
);
params
[
Fix1
]
=
fix1
;
params
[
OneOverFix1
]
=
1.0
f
/
fix1
;
params
[
V
]
=
sqrt
(
cSim
.
kT
*
(
1.0
f
-
em
));
params
[
X
]
=
cSim
.
tau
*
sqrt
(
cSim
.
kT
*
c
);
params
[
Yv
]
=
sqrt
(
cSim
.
kT
*
b
/
c
);
params
[
Yx
]
=
cSim
.
tau
*
sqrt
(
cSim
.
kT
*
b
/
(
1.0
f
-
em
));
float
vscale
=
exp
(
-
newStepSize
/
cSim
.
tau
);
float
fscale
=
(
1
-
vscale
)
*
cSim
.
tau
;
float
noisescale
=
sqrt
(
2
*
cSim
.
kT
/
cSim
.
tau
)
*
sqrt
(
0.5
f
*
(
1
-
vscale
*
vscale
)
*
cSim
.
tau
);
params
[
VelScale
]
=
vscale
;
params
[
ForceScale
]
=
fscale
;
params
[
NoiseScale
]
=
noisescale
;
}
__syncthreads
();
if
(
threadIdx
.
x
<
MaxParams
)
...
...
platforms/cuda/src/kernels/kLangevinUpdate.h
View file @
8725cede
...
...
@@ -24,6 +24,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudatypes.h"
/**
* This file contains the kernels for Langevin integration. It is included
...
...
@@ -45,12 +46,12 @@ void kLangevinUpdatePart1CM_kernel()
void
kLangevinUpdatePart1_kernel
()
#endif
{
__shared__
volatile
float
params
[
MaxParams
];
if
(
threadIdx
.
x
<
MaxParams
)
params
[
threadIdx
.
x
]
=
cSim
.
pLangevinParameters
[
threadIdx
.
x
];
__syncthreads
();
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float
vscale
=
cSim
.
pLangevinParameters
[
VelScale
];
float
fscale
=
cSim
.
pLangevinParameters
[
ForceScale
];
float
noisescale
=
cSim
.
pLangevinParameters
[
NoiseScale
];
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
...
...
@@ -91,45 +92,19 @@ void kLangevinUpdatePart1_kernel()
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
random4a
=
cSim
.
pRandom4
[
rpos
+
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float3
Vmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
Vmh
.
x
=
xVector
.
x
*
params
[
DOverTauC
]
+
sqrtInvMass
*
params
[
Yv
]
*
random4a
.
x
;
Vmh
.
y
=
xVector
.
y
*
params
[
DOverTauC
]
+
sqrtInvMass
*
params
[
Yv
]
*
random4a
.
y
;
Vmh
.
z
=
xVector
.
z
*
params
[
DOverTauC
]
+
sqrtInvMass
*
params
[
Yv
]
*
random4a
.
z
;
float4
vVector
;
vVector
.
x
=
sqrtInvMass
*
params
[
V
]
*
random4a
.
w
;
vVector
.
y
=
sqrtInvMass
*
params
[
V
]
*
random2a
.
x
;
vVector
.
z
=
sqrtInvMass
*
params
[
V
]
*
random2a
.
y
;
vVector
.
w
=
0
.
0
f
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
velocity
.
x
=
velocity
.
x
*
params
[
EM_V
]
+
velocity
.
w
*
force
.
x
*
params
[
TauOneMinusEM_V
]
+
vVector
.
x
-
params
[
EM
]
*
Vmh
.
x
;
velocity
.
y
=
velocity
.
y
*
params
[
EM_V
]
+
velocity
.
w
*
force
.
y
*
params
[
TauOneMinusEM_V
]
+
vVector
.
y
-
params
[
EM
]
*
Vmh
.
y
;
velocity
.
z
=
velocity
.
z
*
params
[
EM_V
]
+
velocity
.
w
*
force
.
z
*
params
[
TauOneMinusEM_V
]
+
vVector
.
z
-
params
[
EM
]
*
Vmh
.
z
;
velocity
.
x
=
vscale
*
velocity
.
x
+
fscale
*
velocity
.
w
*
force
.
x
+
noisescale
*
sqrtInvMass
*
random4a
.
x
;
velocity
.
y
=
vscale
*
velocity
.
y
+
fscale
*
velocity
.
w
*
force
.
y
+
noisescale
*
sqrtInvMass
*
random4a
.
y
;
velocity
.
z
=
vscale
*
velocity
.
z
+
fscale
*
velocity
.
w
*
force
.
z
+
noisescale
*
sqrtInvMass
*
random4a
.
z
;
#ifdef REMOVE_CM
velocity
.
x
-=
sCM
[
0
].
x
;
velocity
.
y
-=
sCM
[
0
].
y
;
velocity
.
z
-=
sCM
[
0
].
z
;
#endif
cSim
.
pOldPosq
[
pos
]
=
apos
;
apos
.
x
=
velocity
.
x
*
params
[
Fix1
];
apos
.
y
=
velocity
.
y
*
params
[
Fix1
];
apos
.
z
=
velocity
.
z
*
params
[
Fix1
];
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pOldPosq
[
pos
]
=
cSim
.
pPosq
[
pos
];
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
...
...
@@ -149,12 +124,9 @@ void kLangevinUpdatePart2CM_kernel()
void
kLangevinUpdatePart2_kernel
()
#endif
{
__shared__
float
params
[
MaxParams
];
if
(
threadIdx
.
x
<
MaxParams
)
params
[
threadIdx
.
x
]
=
cSim
.
pLangevinParameters
[
threadIdx
.
x
];
__syncthreads
();
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float
dt
=
cSim
.
pStepSize
[
0
].
y
;
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
...
...
@@ -164,16 +136,6 @@ void kLangevinUpdatePart2_kernel()
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
velocity
.
x
=
xPrime
.
x
*
params
[
OneOverFix1
];
velocity
.
y
=
xPrime
.
y
*
params
[
OneOverFix1
];
velocity
.
z
=
xPrime
.
z
*
params
[
OneOverFix1
];
#ifdef REMOVE_CM
float
mass
=
1
.
0
f
/
velocity
.
w
;
CM
.
x
+=
mass
*
velocity
.
x
;
...
...
@@ -181,24 +143,9 @@ void kLangevinUpdatePart2_kernel()
CM
.
z
+=
mass
*
velocity
.
z
;
#endif
Xmh
.
x
=
vVector
.
x
*
params
[
TauDOverEMMinusOne
]
+
sqrtInvMass
*
params
[
Yx
]
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
params
[
TauDOverEMMinusOne
]
+
sqrtInvMass
*
params
[
Yx
]
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
params
[
TauDOverEMMinusOne
]
+
sqrtInvMass
*
params
[
Yx
]
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
params
[
X
]
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
params
[
X
]
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
params
[
X
]
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
float4
xPrime
=
make_float4
(
dt
*
velocity
.
x
,
dt
*
velocity
.
y
,
dt
*
velocity
.
z
,
0
);
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
...
...
platforms/cuda/src/kernels/kRandom.cu
View file @
8725cede
...
...
@@ -54,14 +54,14 @@ void GetRandomSim(gpuContext gpu)
extern
__shared__
float3
sRand
[];
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_RANDOM_THREADS_PER_BLOCK
,
1
)
#endif
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_RANDOM_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_RANDOM_THREADS_PER_BLOCK
,
1
)
#endif
void
kGenerateRandoms_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -73,7 +73,7 @@ void kGenerateRandoms_kernel()
float4
random4
;
float2
random2
;
while
(
pos
<
cSim
.
totalRandoms
TimesTwo
)
while
(
pos
<
cSim
.
totalRandoms
)
{
// Generate 6 randoms in GRF
...
...
@@ -156,10 +156,10 @@ void kGenerateRandoms_kernel()
random4
.
y
=
sRand
[
threadIdx
.
x
].
y
;
random4
.
z
=
sRand
[
threadIdx
.
x
].
z
;
random4
.
w
=
sRand
[
threadIdx
.
x
+
blockDim
.
x
].
x
;
cSim
.
pRandom4
a
[
pos
]
=
random4
;
cSim
.
pRandom4
[
pos
]
=
random4
;
random2
.
x
=
sRand
[
threadIdx
.
x
+
blockDim
.
x
].
y
;
random2
.
y
=
sRand
[
threadIdx
.
x
+
blockDim
.
x
].
z
;
cSim
.
pRandom2
a
[
pos
]
=
random2
;
cSim
.
pRandom2
[
pos
]
=
random2
;
pos
+=
increment
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment