Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
799d4b1f
Unverified
Commit
799d4b1f
authored
May 14, 2019
by
Andy Simmonett
Browse files
Nose-Hoover chain propagation works on CUDA
parent
6278ef5f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
197 additions
and
8 deletions
+197
-8
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+125
-8
platforms/cuda/src/kernels/noseHooverChain.cu
platforms/cuda/src/kernels/noseHooverChain.cu
+70
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
799d4b1f
...
@@ -1762,7 +1762,9 @@ public:
...
@@ -1762,7 +1762,9 @@ public:
private:
private:
CudaContext
&
cu
;
CudaContext
&
cu
;
CudaArray
scaleFactor
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
CUfunction
computeHeatBathEnergyKernel
;
};
};
/**
/**
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
799d4b1f
...
@@ -8353,28 +8353,145 @@ void CudaNoseHooverChainKernel::initialize() {
...
@@ -8353,28 +8353,145 @@ void CudaNoseHooverChainKernel::initialize() {
cu.setAsCurrent();
cu.setAsCurrent();
map<string, string> defines;
map<string, string> defines;
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1} {";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}
)
{";
defines["END_YS_LOOP"] = "}";
defines["END_YS_LOOP"] = "}";
CUmodule module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "");
CUmodule module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "
-std=c++11
");
propagateKernels[1] = cu.getKernel(module, "propagateNoseHooverChain");
propagateKernels[1] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}
{";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.828981543588751, -0.657963087177502, 0.828981543588751}
)
{";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "
-std=c++11
");
propagateKernels[3] = cu.getKernel(module, "propagateNoseHooverChain");
propagateKernels[3] = cu.getKernel(module, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}
{";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}
)
{";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "
-std=c++11
");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
}
}
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) {
double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, double kineticEnergy, double timeStep) {
cu.setAsCurrent();
cu.setAsCurrent();
return 1;
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size()) chainState.resize(chainID+1);
if (!chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState[chainID].initialize<double2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<double2> zeros(chainLength, {0.0, 0.0});
chainState[chainID].upload(zeros);
} else {
chainState[chainID].initialize<float2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<float2> zeros(chainLength, {0.0, 0.0});
chainState[chainID].upload(zeros);
}
}
if (!scaleFactor.isInitialized() ||scaleFactor.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
scaleFactor.initialize<double>(cu, 1, "scaleFactor");
scaleFactor.upload(one);
} else {
std::vector<float> one(1);
scaleFactor.initialize<float>(cu, 1, "scaleFactor");
scaleFactor.upload(one);
}
}
if (!chainForces.isInitialized() || !chainMasses.isInitialized() || chainForces.getSize() < chainLength || chainMasses.getSize() < chainLength) {
if(useDouble){
std::vector<double> zeros(chainLength,0);
chainMasses.initialize<double>(cu, chainLength, "chainMasses");
chainForces.initialize<double>(cu, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
} else {
std::vector<float> zeros(chainLength,0);
chainMasses.initialize<float>(cu, chainLength, "chainMasses");
chainForces.initialize<float>(cu, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
}
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float timeStepFloat = (float) timeStep;
float frequencyFloat = (float) frequency;
float kineticEnergyFloat = (float) kineticEnergy;
void *args[] = {&scaleFactor.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainLength, &numMTS, &numDOFs,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat,
useDouble ? (void*) &kineticEnergy : (void*) &kineticEnergyFloat, &chainState[chainID].getDevicePointer()};
if (numYS == 1 || numYS == 3 || numYS == 5) {
cu.executeKernel(propagateKernels[numYS], args, 1);
} else {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
return 0;
}
}
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
cu.setAsCurrent();
cu.setAsCurrent();
return 1;
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size() || !chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
return 0.0;
}
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){
std::vector<double> one(1);
heatBathEnergy.initialize<double>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
} else {
std::vector<float> one(1);
heatBathEnergy.initialize<float>(cu, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[chainID].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1);
if (useDouble){
std::vector<double> energy(1);
heatBathEnergy.download(energy);
return energy[0];
} else {
std::vector<float> energy(1);
heatBathEnergy.download(energy);
return (double) energy[0];
}
}
}
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain) {
double CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain) {
...
...
platforms/cuda/src/kernels/noseHooverChain.cu
0 → 100644
View file @
799d4b1f
#include <initializer_list>
/**
* Propagate the Nose-Hoover chain with one yoshida-suzuki term
*/
extern
"C"
__global__
void
propagateNoseHooverChain
(
real
*
__restrict__
scaleFactor
,
real
*
__restrict__
chainMasses
,
real
*
__restrict__
chainForces
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
real
kT
,
float
frequency
,
real
kineticEnergy
,
real2
*
__restrict__
chainData
){
real
&
scale
=
scaleFactor
[
0
];
scale
=
(
real
)
1
;
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
chainMasses
[
bead
]
=
kT
/
(
frequency
*
frequency
);
chainMasses
[
0
]
*=
numDOFs
;
real
KE2
=
2.0
f
*
kineticEnergy
;
real
timeOverMTS
=
timeStep
/
numMTS
;
chainForces
[
0
]
=
(
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
for
(
int
mts
=
0
;
mts
<
numMTS
;
++
mts
)
{
BEGIN_YS_LOOP
real
wdt
=
ys
*
timeOverMTS
;
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
for
(
int
bead
=
chainLength
-
2
;
bead
>=
0
;
--
bead
)
{
real
aa
=
exp
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
chainData
[
bead
].
y
*
aa
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
}
// update particle velocities
real
aa
=
exp
(
-
0.5
f
*
wdt
*
chainData
[
0
].
y
);
scale
*=
aa
;
// update the thermostat positions
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
{
chainData
[
bead
].
x
+=
0.5
f
*
chainData
[
bead
].
y
*
wdt
;
}
// update the forces
chainForces
[
0
]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses
[
0
];
// update thermostat velocities
for
(
int
bead
=
0
;
bead
<
chainLength
-
1
;
++
bead
)
{
real
aa
=
exp
(
-
0.125
f
*
wdt
*
chainData
[
bead
+
1
].
y
);
chainData
[
bead
].
y
=
aa
*
(
aa
*
chainData
[
bead
].
y
+
0.25
f
*
wdt
*
chainForces
[
bead
]);
chainForces
[
bead
+
1
]
=
(
chainMasses
[
bead
]
*
chainData
[
bead
].
y
*
chainData
[
bead
].
y
-
kT
)
/
chainMasses
[
bead
+
1
];
}
chainData
[
chainLength
-
1
].
y
+=
0.25
f
*
wdt
*
chainForces
[
chainLength
-
1
];
END_YS_LOOP
}
// MTS loop
//printf("SCALE %f\n", scale);
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
extern
"C"
__global__
void
computeHeatBathEnergy
(
real
*
__restrict__
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
real
kT
,
float
frequency
,
real2
*
__restrict__
chainData
){
real
&
energy
=
heatBathEnergy
[
0
];
energy
=
(
real
)
0
;
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
real
prefac
=
i
?
1
:
numDOFs
;
real
mass
=
prefac
*
kT
/
(
frequency
*
frequency
);
real
velocity
=
chainData
[
i
].
y
;
// The kinetic energy of this bead
energy
+=
0.5
f
*
mass
*
velocity
*
velocity
;
// The potential energy of this bead
real
position
=
chainData
[
i
].
x
;
energy
+=
prefac
*
kT
*
position
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment