Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3749e84b
Commit
3749e84b
authored
Jun 24, 2009
by
Peter Eastman
Browse files
Various cleanups to integration code
parent
1ef6e1d8
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
946 additions
and
220 deletions
+946
-220
openmmapi/include/openmm/VerletIntegrator.h
openmmapi/include/openmm/VerletIntegrator.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+2
-2
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+6
-4
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+2
-1
platforms/cuda/src/kernels/kBrownianUpdate.cu
platforms/cuda/src/kernels/kBrownianUpdate.cu
+1
-14
platforms/cuda/src/kernels/kLangevinUpdate.cu
platforms/cuda/src/kernels/kLangevinUpdate.cu
+100
-0
platforms/cuda/src/kernels/kLangevinUpdate.h
platforms/cuda/src/kernels/kLangevinUpdate.h
+224
-0
platforms/cuda/src/kernels/kSettle.cu
platforms/cuda/src/kernels/kSettle.cu
+0
-2
platforms/cuda/src/kernels/kShakeH.cu
platforms/cuda/src/kernels/kShakeH.cu
+432
-0
platforms/cuda/src/kernels/kVerletUpdate.cu
platforms/cuda/src/kernels/kVerletUpdate.cu
+4
-196
platforms/cuda/src/kernels/kVerletUpdate.h
platforms/cuda/src/kernels/kVerletUpdate.h
+174
-0
No files found.
openmmapi/include/openmm/VerletIntegrator.h
View file @
3749e84b
...
...
@@ -39,7 +39,7 @@
namespace
OpenMM
{
/**
* This is an Integrator which simulates a System using the
velocity
Verlet algorithm.
* This is an Integrator which simulates a System using the
leap-frog
Verlet algorithm.
*/
class
OPENMM_EXPORT
VerletIntegrator
:
public
Integrator
{
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
3749e84b
...
...
@@ -488,7 +488,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
prevFriction
=
friction
;
prevStepSize
=
stepSize
;
}
kUpdatePart1
(
gpu
);
k
Langevin
UpdatePart1
(
gpu
);
kApplyFirstShake
(
gpu
);
kApplyFirstSettle
(
gpu
);
kApplyFirstCCMA
(
gpu
);
...
...
@@ -497,7 +497,7 @@ void CudaIntegrateLangevinStepKernel::execute(OpenMMContextImpl& context, const
if
(
step
%
data
.
cmMotionFrequency
==
0
)
gpu
->
bCalculateCM
=
true
;
}
kUpdatePart2
(
gpu
);
k
Langevin
UpdatePart2
(
gpu
);
kApplySecondShake
(
gpu
);
kApplySecondSettle
(
gpu
);
kApplySecondCCMA
(
gpu
);
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
3749e84b
...
...
@@ -40,14 +40,14 @@ extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateAndersenThermostat
(
gpuContext
gpu
);
extern
void
kReduceBornSumAndForces
(
gpuContext
gpu
);
extern
void
kUpdatePart1
(
gpuContext
gpu
);
extern
void
kApplyFirstShake
(
gpuContext
gpu
);
extern
void
kApplyFirstCCMA
(
gpuContext
gpu
);
extern
void
kApplyFirstSettle
(
gpuContext
gpu
);
extern
void
kUpdatePart2
(
gpuContext
gpu
);
extern
void
kApplySecondShake
(
gpuContext
gpu
);
extern
void
kApplySecondCCMA
(
gpuContext
gpu
);
extern
void
kApplySecondSettle
(
gpuContext
gpu
);
extern
void
kLangevinUpdatePart1
(
gpuContext
gpu
);
extern
void
kLangevinUpdatePart2
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart1
(
gpuContext
gpu
);
extern
void
kVerletUpdatePart2
(
gpuContext
gpu
);
extern
void
kBrownianUpdatePart1
(
gpuContext
gpu
);
...
...
@@ -72,8 +72,10 @@ extern void SetCalculateAndersenThermostatSim(gpuContext gpu);
extern
void
GetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
SetForcesSim
(
gpuContext
gpu
);
extern
void
GetForcesSim
(
gpuContext
gpu
);
extern
void
SetUpdateShakeHSim
(
gpuContext
gpu
);
extern
void
GetUpdateShakeHSim
(
gpuContext
gpu
);
extern
void
SetShakeHSim
(
gpuContext
gpu
);
extern
void
GetShakeHSim
(
gpuContext
gpu
);
extern
void
SetLangevinUpdateSim
(
gpuContext
gpu
);
extern
void
GetLangevinUpdateSim
(
gpuContext
gpu
);
extern
void
SetSettleSim
(
gpuContext
gpu
);
extern
void
GetSettleSim
(
gpuContext
gpu
);
extern
void
SetCCMASim
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
3749e84b
...
...
@@ -1757,7 +1757,8 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetForcesSim
(
gpu
);
SetUpdateShakeHSim
(
gpu
);
SetShakeHSim
(
gpu
);
SetLangevinUpdateSim
(
gpu
);
SetVerletUpdateSim
(
gpu
);
SetBrownianUpdateSim
(
gpu
);
SetSettleSim
(
gpu
);
...
...
platforms/cuda/src/kernels/kBrownianUpdate.cu
View file @
3749e84b
...
...
@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static
__constant__
cudaGmxSimulation
cSim
;
void
SetBrownianUpdateSim
(
gpuContext
gpu
)
...
...
@@ -66,15 +64,9 @@ __global__ void kBrownianUpdatePart1_kernel()
float4
force
=
cSim
.
pForce4
[
pos
];
cSim
.
pOldPosq
[
pos
]
=
apos
;
#ifndef DeltaShake
apos
.
x
+=
force
.
x
*
cSim
.
GDT
+
random4a
.
x
;
apos
.
y
+=
force
.
y
*
cSim
.
GDT
+
random4a
.
y
;
apos
.
z
+=
force
.
z
*
cSim
.
GDT
+
random4a
.
z
;
#else
apos
.
x
=
force
.
x
*
cSim
.
GDT
+
random4a
.
x
;
apos
.
y
=
force
.
y
*
cSim
.
GDT
+
random4a
.
y
;
apos
.
z
=
force
.
z
*
cSim
.
GDT
+
random4a
.
z
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
...
...
@@ -99,11 +91,6 @@ __global__ void kBrownianUpdatePart2_kernel()
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
...
...
@@ -111,7 +98,7 @@ __global__ void kBrownianUpdatePart2_kernel()
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
...
...
platforms/cuda/src/kernels/kLangevinUpdate.cu
0 → 100755
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
using
namespace
std
;
#include "gputypes.h"
static
__constant__
cudaGmxSimulation
cSim
;
void
SetLangevinUpdateSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetLangevinUpdateSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
// Include versions of the kernels with and with center of mass motion removal.
#include "kLangevinUpdate.h"
#define REMOVE_CM
#include "kLangevinUpdate.h"
void
kLangevinUpdatePart1
(
gpuContext
gpu
)
{
// printf("kLangevinUpdatePart1\n");
if
(
gpu
->
bRemoveCM
)
{
kLangevinUpdatePart1CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart1CM"
);
gpu
->
bRemoveCM
=
false
;
}
else
{
kLangevinUpdatePart1_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart1"
);
}
}
extern
void
kGenerateRandoms
(
gpuContext
gpu
);
void
kLangevinUpdatePart2
(
gpuContext
gpu
)
{
// printf("kLangevinUpdatePart2\n");
if
(
gpu
->
bCalculateCM
)
{
kLangevinUpdatePart2CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart2CM"
);
gpu
->
bCalculateCM
=
false
;
gpu
->
bRemoveCM
=
true
;
}
else
{
kLangevinUpdatePart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kLangevinUpdatePart2"
);
}
// Update randoms if necessary
gpu
->
iterations
++
;
if
(
gpu
->
iterations
==
gpu
->
sim
.
randomIterations
)
{
kGenerateRandoms
(
gpu
);
gpu
->
iterations
=
0
;
}
}
platforms/cuda/src/kernels/kLangevinUpdate.h
0 → 100644
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Langevin integration. It is included
* several times in kLangevinUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart1CM_kernel
()
#else
__global__
void
kLangevinUpdatePart1_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
float4
CM1
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float3
Vmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
float4
vVector
;
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
vVector
.
w
=
0
.
0
f
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
vVector
.
x
-
cSim
.
EM
*
Vmh
.
x
;
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
vVector
.
y
-
cSim
.
EM
*
Vmh
.
y
;
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
vVector
.
z
-
cSim
.
EM
*
Vmh
.
z
;
#ifdef REMOVE_CM
velocity
.
x
-=
sCM
[
0
].
x
;
velocity
.
y
-=
sCM
[
0
].
y
;
velocity
.
z
-=
sCM
[
0
].
z
;
#endif
cSim
.
pOldPosq
[
pos
]
=
apos
;
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
#ifdef REMOVE_CM
__global__
void
kLangevinUpdatePart2CM_kernel
()
#else
__global__
void
kLangevinUpdatePart2_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
__syncthreads
();
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#ifdef REMOVE_CM
float
mass
=
1
.
0
f
/
velocity
.
w
;
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
#endif;
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
#ifdef REMOVE_CM
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0
.
0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
#endif
}
platforms/cuda/src/kernels/kSettle.cu
View file @
3749e84b
...
...
@@ -33,8 +33,6 @@
//#include <fstream>
using
namespace
std
;
#define DeltaShake
#include "gputypes.h"
...
...
platforms/cuda/src/kernels/k
Update
ShakeH.cu
→
platforms/cuda/src/kernels/kShakeH.cu
100755 → 100644
View file @
3749e84b
...
...
@@ -30,11 +30,8 @@
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using
namespace
std
;
#define DeltaShake
#include "gputypes.h"
struct
Atom
...
...
@@ -53,272 +50,20 @@ struct Atom
static
__constant__
cudaGmxSimulation
cSim
;
void
Set
Update
ShakeHSim
(
gpuContext
gpu
)
void
SetShakeHSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
Get
Update
ShakeHSim
(
gpuContext
gpu
)
void
GetShakeHSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kUpdatePart1_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
__syncthreads
();
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float3
Vmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
float4
vVector
;
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
vVector
.
w
=
0.0
f
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
vVector
.
x
-
cSim
.
EM
*
Vmh
.
x
;
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
vVector
.
y
-
cSim
.
EM
*
Vmh
.
y
;
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
vVector
.
z
-
cSim
.
EM
*
Vmh
.
z
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
#ifndef DeltaShake
apos
.
x
+=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
+=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
+=
velocity
.
z
*
cSim
.
fix1
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kUpdatePart1CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float4
CM1
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
#if 0
float4 CM2 = { 0.0f, 0.0f, 0.0f, 0.0f };
float4 CM3 = { 0.0f, 0.0f, 0.0f, 0.0f };
float4 CM4 = { 0.0f, 0.0f, 0.0f, 0.0f };
if (cpos < gridDim.x)
CM1 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM2 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM3 = cSim.pLinearMomentum[cpos];
cpos += gridDim.x;
if (cpos < gridDim.x)
CM4 = cSim.pLinearMomentum[cpos];
sCM[threadIdx.x].x = CM1.x + CM2.x + CM3.x + CM4.x;
sCM[threadIdx.x].y = CM1.y + CM2.y + CM3.y + CM4.y;
sCM[threadIdx.x].z = CM1.z + CM2.z + CM3.z + CM4.z;
#else
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
#endif
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
xVector
=
cSim
.
pxVector4
[
pos
];
float4
random4a
=
cSim
.
pRandom4a
[
rpos
+
pos
];
float2
random2a
=
cSim
.
pRandom2a
[
rpos
+
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float3
Vmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
Vmh
.
x
=
xVector
.
x
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
x
;
Vmh
.
y
=
xVector
.
y
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
y
;
Vmh
.
z
=
xVector
.
z
*
cSim
.
DOverTauC
+
sqrtInvMass
*
random4a
.
z
;
float4
vVector
;
vVector
.
x
=
sqrtInvMass
*
random4a
.
w
;
vVector
.
y
=
sqrtInvMass
*
random2a
.
x
;
vVector
.
z
=
sqrtInvMass
*
random2a
.
y
;
vVector
.
w
=
0.0
f
;
cSim
.
pvVector4
[
pos
]
=
vVector
;
velocity
.
x
=
velocity
.
x
*
cSim
.
EM
+
velocity
.
w
*
force
.
x
*
cSim
.
TauOneMinusEM
+
vVector
.
x
-
cSim
.
EM
*
Vmh
.
x
-
sCM
[
0
].
x
;
velocity
.
y
=
velocity
.
y
*
cSim
.
EM
+
velocity
.
w
*
force
.
y
*
cSim
.
TauOneMinusEM
+
vVector
.
y
-
cSim
.
EM
*
Vmh
.
y
-
sCM
[
0
].
y
;
velocity
.
z
=
velocity
.
z
*
cSim
.
EM
+
velocity
.
w
*
force
.
z
*
cSim
.
TauOneMinusEM
+
vVector
.
z
-
cSim
.
EM
*
Vmh
.
z
-
sCM
[
0
].
z
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
#ifndef DeltaShake
apos
.
x
+=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
+=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
+=
velocity
.
z
*
cSim
.
fix1
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
fix1
;
apos
.
y
=
velocity
.
y
*
cSim
.
fix1
;
apos
.
z
=
velocity
.
z
*
cSim
.
fix1
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kUpdatePart1
(
gpuContext
gpu
)
{
// printf("kUpdatePart1\n");
#if 0
static int iteration = 0;
if (iteration == 0)
{
gpu->psPosq4->Download();
gpu->psVelm4->Download();
printf("# %d atoms\n", gpu->natoms);
for (int i = 0; i < gpu->natoms; i++)
{
printf("%5d %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f\n", i,
gpu->psPosq4->_pSysStream[0][i].x, gpu->psPosq4->_pSysStream[0][i].y,
gpu->psPosq4->_pSysStream[0][i].z, gpu->psPosq4->_pSysStream[0][i].w,
gpu->psVelm4->_pSysStream[0][i].x, gpu->psVelm4->_pSysStream[0][i].y,
gpu->psVelm4->_pSysStream[0][i].z, gpu->psVelm4->_pSysStream[0][i].w
);
}
}
iteration++;
#endif
#if 0
static const float KILO = 1e3; // Thousand
static const float BOLTZMANN = 1.380658e-23f; // (J/K)
static const float AVOGADRO = 6.0221367e23f; // ()
static const float RGAS = BOLTZMANN * AVOGADRO; // (J/(mol K))
static const float BOLTZ = (RGAS / KILO); // (kJ/(mol K))
static int iteration = 0;
// Check T
if (iteration % 1000 == 0)
{
gpu->psVelm4->Download();
float ke = 0.0f;
for (int i = 0; i < gpu->natoms; i++)
{
float vx = gpu->psVelm4->_pSysStream[0][i].x;
float vy = gpu->psVelm4->_pSysStream[0][i].y;
float vz = gpu->psVelm4->_pSysStream[0][i].z;
float m = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
ke += m * (vx * vx + vy * vy + vz * vz);
}
float T = ke / (BOLTZ * gpu->sim.degreesOfFreedom);
printf("Iteration %d, Temperature is %f\n", iteration, T);
}
iteration++;
#endif
if
(
gpu
->
bRemoveCM
)
{
kUpdatePart1CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kUpdatePart1CM"
);
gpu
->
bRemoveCM
=
false
;
#if 0
gpu->psLinearMomentum->Download();
gpu->psVelm4->Download();
float3 mv = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->natoms; i++)
{
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
}
mv.x *= gpu->sim.inverseTotalMass;
mv.y *= gpu->sim.inverseTotalMass;
mv.z *= gpu->sim.inverseTotalMass;
float3 mv1 = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->sim.blocks; i++)
{
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
}
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
#endif
}
else
{
kUpdatePart1_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kUpdatePart1"
);
}
}
__global__
void
kApplyFirstShake_kernel
()
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
...
...
@@ -364,20 +109,6 @@ __global__ void kApplyFirstShake_kernel()
xj3
.
x
=
apos3
.
x
;
xj3
.
y
=
apos3
.
y
;
xj3
.
z
=
apos3
.
z
;
#ifndef DeltaShake
xpi
.
x
-=
xi
.
x
;
xpi
.
y
-=
xi
.
y
;
xpi
.
z
-=
xi
.
z
;
xpj1
.
x
-=
xj1
.
x
;
xpj1
.
y
-=
xj1
.
y
;
xpj1
.
z
-=
xj1
.
z
;
xpj2
.
x
-=
xj2
.
x
;
xpj2
.
y
-=
xj2
.
y
;
xpj2
.
z
-=
xj2
.
z
;
xpj3
.
x
-=
xj3
.
x
;
xpj3
.
y
-=
xj3
.
y
;
xpj3
.
z
-=
xj3
.
z
;
#endif
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
...
...
@@ -474,24 +205,6 @@ __global__ void kApplyFirstShake_kernel()
}
iteration
++
;
}
#ifndef DeltaShake
xpi
.
x
+=
xi
.
x
;
xpi
.
y
+=
xi
.
y
;
xpi
.
z
+=
xi
.
z
;
xpj1
.
x
+=
xj1
.
x
;
xpj1
.
y
+=
xj1
.
y
;
xpj1
.
z
+=
xj1
.
z
;
xpj2
.
x
+=
xj2
.
x
;
xpj2
.
y
+=
xj2
.
y
;
xpj2
.
z
+=
xj2
.
z
;
xpj3
.
x
+=
xj3
.
x
;
xpj3
.
y
+=
xj3
.
y
;
xpj3
.
z
+=
xj3
.
z
;
#endif
cSim
.
pPosqP
[
atomID
.
x
]
=
xpi
;
cSim
.
pPosqP
[
atomID
.
y
]
=
xpj1
;
if
(
atomID
.
z
!=
-
1
)
...
...
@@ -513,217 +226,6 @@ void kApplyFirstShake(gpuContext gpu)
}
}
__global__
void
kUpdatePart2_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
__syncthreads
();
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
#ifndef DeltaShake
float4
apos
=
cSim
.
pPosq
[
pos
];
#endif
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
#ifdef DeltaShake
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#else
velocity
.
x
=
(
xPrime
.
x
-
apos
.
x
)
*
cSim
.
oneOverFix1
;
velocity
.
y
=
(
xPrime
.
y
-
apos
.
y
)
*
cSim
.
oneOverFix1
;
velocity
.
z
=
(
xPrime
.
z
-
apos
.
z
)
*
cSim
.
oneOverFix1
;
#endif
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
}
__global__
void
kUpdatePart2CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
rpos
=
cSim
.
pRandomPosition
[
blockIdx
.
x
];
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
__syncthreads
();
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
#ifndef DeltaShake
float4
apos
=
cSim
.
pPosq
[
pos
];
#endif
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float4
vVector
=
cSim
.
pvVector4
[
pos
];
float4
xVector
;
float4
random4b
=
cSim
.
pRandom4b
[
rpos
+
pos
];
float2
random2b
=
cSim
.
pRandom2b
[
rpos
+
pos
];
float3
Xmh
;
float
mass
=
1.0
f
/
velocity
.
w
;
float
sqrtInvMass
=
sqrt
(
velocity
.
w
);
#ifdef DeltaShake
velocity
.
x
=
xPrime
.
x
*
cSim
.
oneOverFix1
;
velocity
.
y
=
xPrime
.
y
*
cSim
.
oneOverFix1
;
velocity
.
z
=
xPrime
.
z
*
cSim
.
oneOverFix1
;
#else
velocity
.
x
=
(
xPrime
.
x
-
apos
.
x
)
*
cSim
.
oneOverFix1
;
velocity
.
y
=
(
xPrime
.
y
-
apos
.
y
)
*
cSim
.
oneOverFix1
;
velocity
.
z
=
(
xPrime
.
z
-
apos
.
z
)
*
cSim
.
oneOverFix1
;
#endif
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
Xmh
.
y
=
vVector
.
y
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
y
;
Xmh
.
z
=
vVector
.
z
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
z
;
xVector
.
x
=
sqrtInvMass
*
random4b
.
w
;
xVector
.
y
=
sqrtInvMass
*
random2b
.
x
;
xVector
.
z
=
sqrtInvMass
*
random2b
.
y
;
xPrime
.
x
+=
xVector
.
x
-
Xmh
.
x
;
xPrime
.
y
+=
xVector
.
y
-
Xmh
.
y
;
xPrime
.
z
+=
xVector
.
z
-
Xmh
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
cSim
.
pxVector4
[
pos
]
=
xVector
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Update random position pointer
if
(
threadIdx
.
x
==
0
)
{
rpos
+=
cSim
.
paddedNumberOfAtoms
;
if
(
rpos
>
cSim
.
randoms
)
rpos
-=
cSim
.
randoms
;
cSim
.
pRandomPosition
[
blockIdx
.
x
]
=
rpos
;
}
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0.0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
}
extern
void
kGenerateRandoms
(
gpuContext
gpu
);
void
kUpdatePart2
(
gpuContext
gpu
)
{
// printf("kUpdatePart2\n");
if
(
gpu
->
bCalculateCM
)
{
kUpdatePart2CM_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
gpu
->
sim
.
update_threads_per_block
*
sizeof
(
float3
)
>>>
();
LAUNCHERROR
(
"kUpdatePart2CM"
);
gpu
->
bCalculateCM
=
false
;
gpu
->
bRemoveCM
=
true
;
#if 0
gpu->psLinearMomentum->Download();
gpu->psVelm4->Download();
float3 mv = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->natoms; i++)
{
float mass = 1.0f / gpu->psVelm4->_pSysStream[0][i].w;
mv.x += mass * gpu->psVelm4->_pSysStream[0][i].x;
mv.y += mass * gpu->psVelm4->_pSysStream[0][i].y;
mv.z += mass * gpu->psVelm4->_pSysStream[0][i].z;
}
mv.x *= gpu->sim.inverseTotalMass;
mv.y *= gpu->sim.inverseTotalMass;
mv.z *= gpu->sim.inverseTotalMass;
float3 mv1 = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < gpu->sim.blocks; i++)
{
mv1.x += gpu->psLinearMomentum->_pSysStream[0][i].x;
mv1.y += gpu->psLinearMomentum->_pSysStream[0][i].y;
mv1.z += gpu->psLinearMomentum->_pSysStream[0][i].z;
}
printf("%11.5f %11.5f %11.5f | %11.5f %11.5f %11.5f\n", mv.x, mv.y, mv.z, mv1.x, mv1.y, mv1.z);
#endif
}
else
{
kUpdatePart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kUpdatePart2"
);
}
// Update randoms if necessary
gpu
->
iterations
++
;
if
(
gpu
->
iterations
==
gpu
->
sim
.
randomIterations
)
{
kGenerateRandoms
(
gpu
);
gpu
->
iterations
=
0
;
}
}
__global__
void
kApplySecondShake_kernel
()
{
__shared__
Atom
sA
[
G8X_THREADS_PER_BLOCK
];
...
...
@@ -769,20 +271,6 @@ __global__ void kApplySecondShake_kernel()
xj3
.
x
=
apos3
.
x
;
xj3
.
y
=
apos3
.
y
;
xj3
.
z
=
apos3
.
z
;
#ifndef DeltaShake
xpi
.
x
-=
xi
.
x
;
xpi
.
y
-=
xi
.
y
;
xpi
.
z
-=
xi
.
z
;
xpj1
.
x
-=
xj1
.
x
;
xpj1
.
y
-=
xj1
.
y
;
xpj1
.
z
-=
xj1
.
z
;
xpj2
.
x
-=
xj2
.
x
;
xpj2
.
y
-=
xj2
.
y
;
xpj2
.
z
-=
xj2
.
z
;
xpj3
.
x
-=
xj3
.
x
;
xpj3
.
y
-=
xj3
.
y
;
xpj3
.
z
-=
xj3
.
z
;
#endif
psA
->
rij1
.
x
=
xi
.
x
-
xj1
.
x
;
psA
->
rij1
.
y
=
xi
.
y
-
xj1
.
y
;
psA
->
rij1
.
z
=
xi
.
z
-
xj1
.
z
;
...
...
@@ -923,15 +411,10 @@ __global__ void kApplyNoShake_kernel()
}
}
void
kCPUShake2
(
gpuContext
gpu
)
{
}
void
kApplySecondShake
(
gpuContext
gpu
)
{
// printf("kApplySecondShake\n");
// kCPUShake2(gpu);
if
(
gpu
->
sim
.
ShakeConstraints
>
0
)
{
kApplySecondShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
shake_threads_per_block
>>>
();
...
...
@@ -940,14 +423,10 @@ void kApplySecondShake(gpuContext gpu)
// handle non-Shake atoms
#ifdef DeltaShake
if
(
gpu
->
sim
.
NonShakeConstraints
>
0
)
{
//fprintf( gpu->log, "kApplyNoShake_kernel %d %d \n", gpu->sim.blocks, gpu->sim.nonshake_threads_per_block); fflush( gpu->log );
kApplyNoShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonshake_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplyNoShake"
);
}
#endif
}
platforms/cuda/src/kernels/kVerletUpdate.cu
View file @
3749e84b
...
...
@@ -35,8 +35,6 @@ using namespace std;
#include "gputypes.h"
#define DeltaShake
static
__constant__
cudaGmxSimulation
cSim
;
void
SetVerletUpdateSim
(
gpuContext
gpu
)
...
...
@@ -53,102 +51,11 @@ void GetVerletUpdateSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kVerletUpdatePart1_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
;
#ifndef DeltaShake
apos
.
x
+=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
+=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
+=
velocity
.
z
*
cSim
.
deltaT
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kVerletUpdatePart1CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
float4
CM1
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
// Include versions of the kernels with and with center of mass motion removal.
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
-
sCM
[
0
].
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
-
sCM
[
0
].
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
-
sCM
[
0
].
z
;
#ifndef DeltaShake
apos
.
x
+=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
+=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
+=
velocity
.
z
*
cSim
.
deltaT
;
#else
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
#endif
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
#include "kVerletUpdate.h"
#define REMOVE_CM
#include "kVerletUpdate.h"
void
kVerletUpdatePart1
(
gpuContext
gpu
)
{
...
...
@@ -166,105 +73,6 @@ void kVerletUpdatePart1(gpuContext gpu)
}
}
__global__
void
kVerletUpdatePart2_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kVerletUpdatePart2CM_kernel
()
{
extern
__shared__
float3
sCM
[];
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float3
CM
=
{
0.0
f
,
0.0
f
,
0.0
f
};
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
float
mass
=
1.0
f
/
velocity
.
w
;
#ifndef DeltaShake
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
-
apos
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
-
apos
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
-
apos
.
z
);
#else
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#endif
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0.0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
}
void
kVerletUpdatePart2
(
gpuContext
gpu
)
{
// printf("kVerletUpdatePart2\n");
...
...
platforms/cuda/src/kernels/kVerletUpdate.h
0 → 100644
View file @
3749e84b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for Verlet integration. It is included
* several times in kVerletUpdate.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart1CM_kernel
()
#else
__global__
void
kVerletUpdatePart1_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
float4
CM1
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
// Read CM outputs from previous step
unsigned
int
cpos
=
threadIdx
.
x
;
while
(
cpos
<
gridDim
.
x
)
{
CM1
=
cSim
.
pLinearMomentum
[
cpos
];
CM
.
x
+=
CM1
.
x
;
CM
.
y
+=
CM1
.
y
;
CM
.
z
+=
CM1
.
z
;
cpos
+=
blockDim
.
x
;
}
sCM
[
threadIdx
.
x
].
x
=
CM
.
x
;
sCM
[
threadIdx
.
x
].
y
=
CM
.
y
;
sCM
[
threadIdx
.
x
].
z
=
CM
.
z
;
__syncthreads
();
// Reduce CM
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
force
=
cSim
.
pForce4
[
pos
];
float
dtOverMass
=
cSim
.
deltaT
*
velocity
.
w
;
cSim
.
pOldPosq
[
pos
]
=
apos
;
velocity
.
x
+=
dtOverMass
*
force
.
x
;
velocity
.
y
+=
dtOverMass
*
force
.
y
;
velocity
.
z
+=
dtOverMass
*
force
.
z
;
#ifdef REMOVE_CM
velocity
.
x
-=
sCM
[
0
].
x
;
velocity
.
y
-=
sCM
[
0
].
y
;
velocity
.
z
-=
sCM
[
0
].
z
;
#endif
apos
.
x
=
velocity
.
x
*
cSim
.
deltaT
;
apos
.
y
=
velocity
.
y
*
cSim
.
deltaT
;
apos
.
z
=
velocity
.
z
*
cSim
.
deltaT
;
cSim
.
pPosqP
[
pos
]
=
apos
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
#ifdef REMOVE_CM
__global__
void
kVerletUpdatePart2CM_kernel
()
#else
__global__
void
kVerletUpdatePart2_kernel
()
#endif
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
#ifdef REMOVE_CM
extern
__shared__
float3
sCM
[];
float3
CM
=
{
0
.
0
f
,
0
.
0
f
,
0
.
0
f
};
#endif
while
(
pos
<
cSim
.
atoms
)
{
float4
velocity
=
cSim
.
pVelm4
[
pos
];
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
xPrime
=
cSim
.
pPosqP
[
pos
];
velocity
.
x
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
x
);
velocity
.
y
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
y
);
velocity
.
z
=
cSim
.
oneOverDeltaT
*
(
xPrime
.
z
);
xPrime
.
x
+=
apos
.
x
;
xPrime
.
y
+=
apos
.
y
;
xPrime
.
z
+=
apos
.
z
;
#ifdef REMOVE_CM
float
mass
=
1
.
0
f
/
velocity
.
w
;
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
#endif
cSim
.
pPosq
[
pos
]
=
xPrime
;
cSim
.
pVelm4
[
pos
]
=
velocity
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
#ifdef REMOVE_CM
// Scale CM
CM
.
x
*=
cSim
.
inverseTotalMass
;
CM
.
y
*=
cSim
.
inverseTotalMass
;
CM
.
z
*=
cSim
.
inverseTotalMass
;
sCM
[
threadIdx
.
x
]
=
CM
;
__syncthreads
();
// Reduce CM for CTA
unsigned
int
offset
=
1
;
unsigned
int
mask
=
1
;
while
(
offset
<
blockDim
.
x
)
{
if
(((
threadIdx
.
x
&
mask
)
==
0
)
&&
(
threadIdx
.
x
+
offset
<
blockDim
.
x
))
{
sCM
[
threadIdx
.
x
].
x
+=
sCM
[
threadIdx
.
x
+
offset
].
x
;
sCM
[
threadIdx
.
x
].
y
+=
sCM
[
threadIdx
.
x
+
offset
].
y
;
sCM
[
threadIdx
.
x
].
z
+=
sCM
[
threadIdx
.
x
+
offset
].
z
;
}
mask
=
2
*
mask
+
1
;
offset
*=
2
;
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
float4
CM
;
CM
.
x
=
sCM
[
0
].
x
;
CM
.
y
=
sCM
[
0
].
y
;
CM
.
z
=
sCM
[
0
].
z
;
CM
.
w
=
0
.
0
f
;
cSim
.
pLinearMomentum
[
blockIdx
.
x
]
=
CM
;
}
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment