Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cca3e11a
Unverified
Commit
cca3e11a
authored
Aug 27, 2019
by
Andy Simmonett
Browse files
Correct CUDA NHC implementation
parent
50efef13
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
370 additions
and
201 deletions
+370
-201
openmmapi/src/NoseHooverIntegrator.cpp
openmmapi/src/NoseHooverIntegrator.cpp
+3
-10
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+12
-9
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+0
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+176
-96
platforms/cuda/src/kernels/noseHooverChain.cu
platforms/cuda/src/kernels/noseHooverChain.cu
+100
-31
platforms/cuda/src/kernels/utilities.cu
platforms/cuda/src/kernels/utilities.cu
+1
-1
platforms/reference/src/ReferenceKernels.cpp
platforms/reference/src/ReferenceKernels.cpp
+68
-44
plugins/drude/tests/TestDrudeNoseHoover.h
plugins/drude/tests/TestDrudeNoseHoover.h
+10
-9
No files found.
openmmapi/src/NoseHooverIntegrator.cpp
View file @
cca3e11a
...
@@ -121,7 +121,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
...
@@ -121,7 +121,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
int
numForces
=
system
.
getNumForces
();
int
numForces
=
system
.
getNumForces
();
if
(
thermostatedPairs
.
size
()
==
0
){
// remove 3 degrees of freedom from thermostats that act on absolute motions
if
(
thermostatedPairs
.
size
()
==
0
){
// remove 3 degrees of freedom from thermostats that act on absolute motions
for
(
int
forceNum
=
0
;
forceNum
<
numForces
;
++
forceNum
)
{
for
(
int
forceNum
=
0
;
forceNum
<
numForces
;
++
forceNum
)
{
//
if (dynamic_cast<CMMotionRemover*>(&system.getForce(forceNum))) nDOF -= 3;
if
(
dynamic_cast
<
CMMotionRemover
*>
(
&
system
.
getForce
(
forceNum
)))
nDOF
-=
3
;
}
}
}
}
...
@@ -143,11 +143,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
...
@@ -143,11 +143,7 @@ int NoseHooverIntegrator::addSubsystemThermostat(System& system, const std::vect
}
}
// create and add new chain
// create and add new chain
int
chainID
=
0
;
int
chainID
=
noseHooverChains
.
size
();
for
(
const
auto
&
c
:
noseHooverChains
)
{
// We need to account for the fact that a NHC with pairs thermostated has both a relative and an absolute chain
chainID
+=
c
.
getThermostatedPairs
().
size
()
?
2
:
1
;
}
auto
nhc
=
NoseHooverChain
(
temperature
,
relativeTemperature
,
collisionFrequency
,
relativeCollisionFrequency
,
auto
nhc
=
NoseHooverChain
(
temperature
,
relativeTemperature
,
collisionFrequency
,
relativeCollisionFrequency
,
nDOF
,
chainLength
,
numMTS
,
numYoshidaSuzuki
,
chainID
,
thermostatedParticles
,
thermostatedPairs
);
nDOF
,
chainLength
,
numMTS
,
numYoshidaSuzuki
,
chainID
,
thermostatedParticles
,
thermostatedPairs
);
noseHooverChains
.
push_back
(
nhc
);
noseHooverChains
.
push_back
(
nhc
);
...
@@ -232,10 +228,7 @@ double NoseHooverIntegrator::computeKineticEnergy() {
...
@@ -232,10 +228,7 @@ double NoseHooverIntegrator::computeKineticEnergy() {
double
kE
=
0.0
;
double
kE
=
0.0
;
if
(
noseHooverChains
.
size
())
{
if
(
noseHooverChains
.
size
())
{
for
(
const
auto
&
nhc
:
noseHooverChains
){
for
(
const
auto
&
nhc
:
noseHooverChains
){
if
(
nhc
.
getThermostatedPairs
().
size
()
==
0
)
{
kE
+=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
true
).
first
;
auto
KEs
=
nhcKernel
.
getAs
<
NoseHooverChainKernel
>
().
computeMaskedKineticEnergy
(
*
context
,
nhc
,
true
);
kE
+=
std
::
get
<
0
>
(
KEs
);
}
}
}
}
else
{
}
else
{
kE
=
vvKernel
.
getAs
<
IntegrateVelocityVerletStepKernel
>
().
computeKineticEnergy
(
*
context
,
*
this
);
kE
=
vvKernel
.
getAs
<
IntegrateVelocityVerletStepKernel
>
().
computeKineticEnergy
(
*
context
,
*
this
);
...
...
platforms/cuda/include/CudaKernels.h
View file @
cca3e11a
...
@@ -1731,11 +1731,11 @@ public:
...
@@ -1731,11 +1731,11 @@ public:
*
*
* @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnerg
y
the kineticEnergy of the particles being thermostated by this chain.
* @param kineticEnerg
ies
the
{absolute, relative}
kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @param timeStep the time step used by the integrator.
* @return the velocity scale factor to apply to the particles associated with this heat bath.
* @return the
{absolute, relative}
velocity scale factor to apply to the particles associated with this heat bath.
*/
*/
virtual
double
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
double
kineticEnerg
y
,
double
timeStep
);
virtual
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnerg
ies
,
double
timeStep
);
/**
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
*
...
@@ -1753,26 +1753,29 @@ public:
...
@@ -1753,26 +1753,29 @@ public:
* @param downloadValue whether the computed value should be downloaded and returned.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*
*/
*/
virtual
double
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
virtual
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
*
* @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactor the multiplicative factor by which velocities are scaled.
* @param scaleFactor
s
the
{absolute, relative}
multiplicative factor by which velocities are scaled.
*/
*/
virtual
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
double
scaleFactor
);
virtual
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactor
s
);
private:
private:
int
sumWorkGroupSize
;
CudaContext
&
cu
;
CudaContext
&
cu
;
CudaArray
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
CudaArray
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
CudaArray
>
mask
s
;
std
::
map
<
int
,
CudaArray
>
atomlists
,
pairlist
s
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
std
::
map
<
int
,
CUfunction
>
propagateKernels
;
CUfunction
reduceEnergyKernel
;
CUfunction
reduceEnergyKernel
;
CUfunction
computeHeatBathEnergyKernel
;
CUfunction
computeHeatBathEnergyKernel
;
CUfunction
computeMaskedKineticEnergyKernel
;
CUfunction
computeAtomsKineticEnergyKernel
;
CUfunction
scaleVelocitiesKernel
;
CUfunction
computePairsKineticEnergyKernel
;
CUfunction
scaleAtomsVelocitiesKernel
;
CUfunction
scalePairsVelocitiesKernel
;
};
};
/**
/**
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
cca3e11a
...
@@ -808,7 +808,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
...
@@ -808,7 +808,6 @@ double CudaIntegrationUtilities::computeKineticEnergy(double timeShift) {
energy
+=
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
)
/
v
.
w
;
energy
+=
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
)
/
v
.
w
;
}
}
}
}
// Restore the velocities.
// Restore the velocities.
if
(
timeShift
!=
0
)
if
(
timeShift
!=
0
)
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
cca3e11a
...
@@ -8358,6 +8358,8 @@ void CudaNoseHooverChainKernel::initialize() {
...
@@ -8358,6 +8358,8 @@ void CudaNoseHooverChainKernel::initialize() {
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
map<string, string> defines;
map<string, string> defines;
sumWorkGroupSize = 512;
defines["WORK_GROUP_SIZE"] = cu.intToString(sumWorkGroupSize);
defines["MIXEDEXP"] = useDouble ? "exp" : "expf";
defines["MIXEDEXP"] = useDouble ? "exp" : "expf";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {1}) {";
defines["END_YS_LOOP"] = "}";
defines["END_YS_LOOP"] = "}";
...
@@ -8369,56 +8371,50 @@ void CudaNoseHooverChainKernel::initialize() {
...
@@ -8369,56 +8371,50 @@ void CudaNoseHooverChainKernel::initialize() {
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){";
defines["BEGIN_YS_LOOP"] = "for(const real & ys : {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}){";
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
propagateKernels[5] = cu.getKernel(module, "propagateNoseHooverChain");
module = cu.createModule(CudaKernelSources::noseHooverChain, defines, "-std=c++11");
reduceEnergyKernel = cu.getKernel(module, "reduceEnergyPair");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
computeHeatBathEnergyKernel = cu.getKernel(module, "computeHeatBathEnergy");
compute
Masked
KineticEnergyKernel = cu.getKernel(module, "compute
Masked
KineticEnergy");
compute
Atoms
KineticEnergyKernel = cu.getKernel(module, "compute
Atoms
KineticEnergy");
scaleVelocities
Kernel = cu.getKernel(module, "
scaleVelocities
");
computePairsKineticEnergy
Kernel = cu.getKernel(module, "
computePairsKineticEnergy
");
CUmodule utilities = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::util
ities);
scaleAtomsVelocitiesKernel = cu.getKernel(module, "scaleAtomsVeloc
ities
"
);
reduceEnergy
Kernel = cu.getKernel(
utilities, "reduceEnergy
");
scalePairsVelocities
Kernel = cu.getKernel(
module, "scalePairsVelocities
");
}
}
double
CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc,
double
kineticEnerg
y
, double timeStep) {
std::pair<double, double>
CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc,
std::pair<double, double>
kineticEnerg
ies
, double timeStep) {
cu.setAsCurrent();
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainID = nhc.getDefaultChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getDefaultChainLength();
int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double frequency = nhc.getDefaultCollisionFrequency();
double relativeTemperature = nhc.getDefaultRelativeTemperature();
double relativeFrequency = nhc.getDefaultRelativeCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (numYS != 1 && numYS != 3 && numYS != 5) {
if (!chainState.count(chainID)) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
chainState[chainID] = CudaArray();
}
if (chainState.at(chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(chainID).upload(zeros.data());
} else {
chainState.at(chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(chainID).upload(zeros.data());
}
}
}
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) {
if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) {
if(useDouble){
if(useDouble){
std::vector<double>
one(1)
;
std::vector<double
2
>
zeros{{0,0}}
;
scaleFactorBuffer.initialize<double>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.initialize<double
2
>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(
one
);
scaleFactorBuffer.upload(
zeros
);
} else {
} else {
std::vector<float>
one(1)
;
std::vector<float
2
>
zeros{{0,0}}
;
scaleFactorBuffer.initialize<float>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.initialize<float
2
>(cu, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(
one
);
scaleFactorBuffer.upload(
zeros
);
}
}
}
}
std::vector<double> zeros(chainLength,0);
std::vector<double> zeros(chainLength,0);
if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){
if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){
if(useDouble){
if(useDouble){
...
@@ -8435,28 +8431,73 @@ double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const Nos
...
@@ -8435,28 +8431,73 @@ double CudaNoseHooverChainKernel::propagateChain(ContextImpl& context, const Nos
}
}
if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength);
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float timeStepFloat = (float) timeStep;
float timeStepFloat = (float) timeStep;
float frequencyFloat = (float) frequency;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
void *args[] = {
if (nAtoms) {
&chainState[chainID].getDevicePointer(),
if (!chainState.count(2*chainID)) chainState[2*chainID] = CudaArray();
if (chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
} else {
chainState.at(2*chainID).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void *args[] = {
&chainState[2*chainID].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainLength, &numMTS, &numDOFs,
&chainType,
&chainLength, &numMTS, &numDOFs,
&timeStepFloat,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
&frequencyFloat
};
};
if (numYS == 1 || numYS == 3 || numYS == 5) {
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
} else {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
}
return 0;
if (nPairs) {
if (!chainState.count(2*chainID+1)) chainState[2*chainID+1] = CudaArray();
if (chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the CUDA array
if(useDouble){
chainState.at(2*chainID+1).initialize<double2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<double2> zeros(chainLength, make_double2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
} else {
chainState.at(2*chainID+1).initialize<float2>(cu, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<float2> zeros(chainLength, make_float2(0, 0));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
void *args[] = {
&chainState[2*chainID+1].getDevicePointer(),
&kineticEnergyBuffer.getDevicePointer(),
&scaleFactorBuffer.getDevicePointer(),
&chainMasses.getDevicePointer(),
&chainForces.getDevicePointer(),
&chainType, &chainLength, &numMTS, &ndf,
&timeStepFloat,
useDouble ? (void*) &kT : (void*) &kTfloat,
&frequencyFloat
};
cu.executeKernel(propagateKernels[numYS], args, 1, 1);
}
return {0, 0};
}
}
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
...
@@ -8466,14 +8507,17 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
...
@@ -8466,14 +8507,17 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
int chainID = nhc.getDefaultChainID();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
int chainLength = nhc.getDefaultChainLength();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
auto & chainState = cu.getIntegrationUtilities().getNoseHooverChainState();
if (chainID >= chainState.size() || !chainState[chainID].isInitialized() || chainState[chainID].getSize() != chainLength) {
return 0.0;
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
}
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){
if(useDouble){
...
@@ -8487,15 +8531,35 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
...
@@ -8487,15 +8531,35 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
}
}
}
}
double kT = BOLTZ * temperature;
cu.clearBuffer(heatBathEnergy);
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
if (absChainIsValid) {
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[chainID].getDevicePointer()};
&frequencyFloat, &chainState[2*chainID].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getDefaultRelativeTemperature();
double frequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
void * args[] = {&heatBathEnergy.getDevicePointer(), &chainLength, &numDOFs,
useDouble ? (void*) &kT : (void*) & kTfloat,
&frequencyFloat, &chainState[2*chainID+1].getDevicePointer()};
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
}
cu.executeKernel(computeHeatBathEnergyKernel, args, 1, 1);
void * pinnedBuffer = cu.getPinnedBuffer();
void * pinnedBuffer = cu.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
heatBathEnergy.download(pinnedBuffer);
...
@@ -8506,78 +8570,94 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
...
@@ -8506,78 +8570,94 @@ double CudaNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, co
}
}
}
}
double
CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
std::pair<double, double>
CudaNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
cu.setAsCurrent();
cu.setAsCurrent();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainID = nhc.getDefaultChainID();
const auto & parents = nhc.getThermostatedPairs();
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & thermostatedAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = cu.getPaddedNumAtoms();
auto nAtoms = nhcAtoms.size();
if (!masks.count(chainID)) {
auto nPairs = nhcPairs.size();
// We need to upload the CUDA array
if (nAtoms) {
std::vector<int> maskVec(nAtoms);
if (!atomlists.count(chainID)) {
for (int i = 0; i < nAtoms; ++i) maskVec[i] = i;
// We need to upload the CUDA array
atomlists[chainID] = CudaArray();
if (parents.size() ) {
atomlists[chainID].initialize<int>(cu, nAtoms, "atomlist" + std::to_string(chainID));
assert(parents.size() == thermostatedAtoms.size());
atomlists[chainID].upload(nhcAtoms);
for ( size_t i = 0; i < parents.size(); ++i) {
}
int atom = thermostatedAtoms[i];
if (atomlists[chainID].getSize() != nAtoms) {
maskVec[atom] = parents[i];
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
} else {
}
for ( const auto & atom : thermostatedAtoms) {
if (nPairs) {
maskVec[atom] = -1L;
if (!pairlists.count(chainID)) {
}
// We need to upload the CUDA array
pairlists[chainID] = CudaArray();
pairlists[chainID].initialize<int2>(cu, nPairs, "pairlist" + std::to_string(chainID));
std::vector<int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(make_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
masks[chainID] = CudaArray();
masks[chainID].initialize<int>(cu, nAtoms, "mask" + std::to_string(chainID));
masks[chainID].upload(maskVec);
}
if (masks[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if(useDouble){
if(useDouble){
std::vector<double>
one(1)
;
std::vector<double
2
>
zeros{{0,0}}
;
kineticEnergyBuffer.initialize<double>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.initialize<double
2
>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(
one
);
kineticEnergyBuffer.upload(
zeros
);
} else {
} else {
std::vector<float>
one(1)
;
std::vector<float
2
>
zeros{{0,0}}
;
kineticEnergyBuffer.initialize<float>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.initialize<float
2
>(cu, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(
one
);
kineticEnergyBuffer.upload(
zeros
);
}
}
}
}
cu.clearBuffer(cu.getEnergyBuffer());
cu.clearBuffer(cu.getEnergyBuffer());
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()};
if (nAtoms) {
cu.executeKernel(computeMaskedKineticEnergyKernel, args, nAtoms);
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(computeAtomsKineticEnergyKernel, args, nAtoms);
}
if (nPairs) {
void *args[] = {&cu.getEnergyBuffer().getDevicePointer(),&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(computePairsKineticEnergyKernel, args, nPairs);
}
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
//taken from CudaContext::reduceEnergy(); the final kinetic energy will live in the kineticEnergy buffer
int bufferSize = cu.getEnergyBuffer().getSize();
int bufferSize = cu.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
int workGroupSize = 512;
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &sumWorkGroupSize};
void* args2[] = {&cu.getEnergyBuffer().getDevicePointer(), &kineticEnergyBuffer.getDevicePointer(), &bufferSize, &workGroupSize};
cu.executeKernel(reduceEnergyKernel, args2, sumWorkGroupSize, sumWorkGroupSize, 2*sumWorkGroupSize*cu.getEnergyBuffer().getElementSize());
cu.executeKernel(reduceEnergyKernel, args2, workGroupSize, workGroupSize, workGroupSize*cu.getEnergyBuffer().getElementSize());
double value = 0
;
std::pair<double, double> KEs = {0, 0}
;
if (downloadValue) {
if (downloadValue) {
void * pinnedBuffer = cu.getPinnedBuffer();
void * pinnedBuffer = cu.getPinnedBuffer();
kineticEnergyBuffer.download(pinnedBuffer);
kineticEnergyBuffer.download(pinnedBuffer);
value = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.first = useDouble ? *((double*) pinnedBuffer) : *((float*) pinnedBuffer);
KEs.second = useDouble ? *((double*) pinnedBuffer + 1) : *((float*) pinnedBuffer + 1);
}
}
return
value
;
return
KEs
;
}
}
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc,
double
scaleFactor) {
void CudaNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc,
std::pair<double, double>
scaleFactor) {
// For now we assume that the
mask
info is valid, because compute
Masked
KineticEnergy must have been
// For now we assume that the
atoms and pairs
info is valid, because compute
{Atoms|Pairs}
KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
cu.setAsCurrent();
cu.setAsCurrent();
auto nAtoms = cu.getPaddedNumAtoms();
int chainID = nhc.getDefaultChainID();
int chainID = nhc.getDefaultChainID();
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
auto nAtoms = nhc.getThermostatedAtoms().size();
&nAtoms, &cu.getVelm().getDevicePointer(), &masks[chainID].getDevicePointer()};
auto nPairs = nhc.getThermostatedPairs().size();
cu.executeKernel(scaleVelocitiesKernel, args, nAtoms);
if(nAtoms) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nAtoms, &cu.getVelm().getDevicePointer(), &atomlists[chainID].getDevicePointer()};
cu.executeKernel(scaleAtomsVelocitiesKernel, args, nAtoms);
}
if(nPairs) {
void *args[] = {&scaleFactorBuffer.getDevicePointer(),
&nPairs, &cu.getVelm().getDevicePointer(), &pairlists[chainID].getDevicePointer()};
cu.executeKernel(scalePairsVelocitiesKernel, args, nPairs);
}
}
}
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
...
...
platforms/cuda/src/kernels/noseHooverChain.cu
View file @
cca3e11a
#include <initializer_list>
#include <initializer_list>
extern
"C"
__global__
void
propagateNoseHooverChain
(
mixed2
*
__restrict__
chainData
,
const
mixed
*
__restrict__
energySum
,
mixed
*
__restrict__
scaleFactor
,
extern
"C"
__global__
void
propagateNoseHooverChain
(
mixed2
*
__restrict__
chainData
,
const
mixed
2
*
__restrict__
energySum
,
mixed
2
*
__restrict__
scaleFactor
,
mixed
*
__restrict__
chainMasses
,
mixed
*
__restrict__
chainForces
,
mixed
*
__restrict__
chainMasses
,
mixed
*
__restrict__
chainForces
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
int
chainType
,
int
chainLength
,
int
numMTS
,
int
numDOFs
,
float
timeStep
,
mixed
kT
,
float
frequency
){
mixed
kT
,
float
frequency
){
mixed
&
scale
=
scaleFactor
[
0
];
const
mixed
&
kineticEnergy
=
chainType
?
energySum
[
0
].
y
:
energySum
[
0
].
x
;
mixed
&
scale
=
chainType
?
scaleFactor
[
0
].
y
:
scaleFactor
[
0
].
x
;
scale
=
(
mixed
)
1
;
scale
=
(
mixed
)
1
;
const
mixed
&
kineticEnergy
=
energySum
[
0
];
if
(
kineticEnergy
<
1e-8
)
return
;
if
(
kineticEnergy
<
1e-8
)
return
;
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
chainMasses
[
bead
]
=
kT
/
(
frequency
*
frequency
);
for
(
int
bead
=
0
;
bead
<
chainLength
;
++
bead
)
chainMasses
[
bead
]
=
kT
/
(
frequency
*
frequency
);
chainMasses
[
0
]
*=
numDOFs
;
chainMasses
[
0
]
*=
numDOFs
;
...
@@ -49,11 +49,10 @@ extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainDa
...
@@ -49,11 +49,10 @@ extern "C" __global__ void propagateNoseHooverChain(mixed2* __restrict__ chainDa
/**
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
*/
extern
"C"
__global__
void
computeHeatBathEnergy
(
mixed
*
__restrict__
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
extern
"C"
__global__
void
computeHeatBathEnergy
(
mixed
*
__restrict__
heatBathEnergy
,
int
chainLength
,
int
numDOFs
,
mixed
kT
,
float
frequency
,
const
mixed2
*
__restrict__
chainData
){
mixed
kT
,
float
frequency
,
const
mixed2
*
__restrict__
chainData
){
// Note that this is always incremented; make sure it's zeroed properly before the first call
mixed
&
energy
=
heatBathEnergy
[
0
];
mixed
&
energy
=
heatBathEnergy
[
0
];
energy
=
(
mixed
)
0
;
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
prefac
=
i
?
1
:
numDOFs
;
...
@@ -67,35 +66,105 @@ extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEne
...
@@ -67,35 +66,105 @@ extern "C" __global__ void computeHeatBathEnergy(mixed* __restrict__ heatBathEne
}
}
}
}
extern
"C"
__global__
void
compute
Masked
KineticEnergy
(
mixed
*
__restrict__
energyBuffer
,
int
paddedN
umAtoms
,
extern
"C"
__global__
void
compute
Atoms
KineticEnergy
(
mixed
2
*
__restrict__
energyBuffer
,
int
n
umAtoms
,
const
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
mask
){
const
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
mixed
energy
=
0
;
mixed
2
energy
=
make_mixed2
(
0
,
0
)
;
//energy = 1; return;
//energy = 1; return;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
paddedNumAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
v
=
velm
[
index
];
int
atom
=
atoms
[
index
];
mixed4
v
=
velm
[
atom
];
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
mixed
mass
=
v
.
w
==
0
?
0
:
1
/
v
.
w
;
if
(
mask
[
index
]
>=
0
){
energy
.
x
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
const
mixed4
&
vparent
=
velm
[
mask
[
index
]];
mixed
massp
=
vparent
.
w
==
0
?
0
:
1
/
vparent
.
w
;
mass
=
(
massp
+
mass
)
==
0
?
0
:
(
massp
*
mass
)
/
(
massp
+
mass
);
v
.
x
-=
vparent
.
x
;
v
.
y
-=
vparent
.
y
;
v
.
z
-=
vparent
.
z
;
}
energy
+=
0.5
f
*
mass
*
(
v
.
x
*
v
.
x
+
v
.
y
*
v
.
y
+
v
.
z
*
v
.
z
);
}
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
energy
;
}
}
extern
"C"
__global__
void
scaleVelocities
(
mixed
*
__restrict__
scaleFactor
,
int
paddedNumAtoms
,
extern
"C"
__global__
void
computePairsKineticEnergy
(
mixed2
*
__restrict__
energyBuffer
,
int
numPairs
,
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
mask
){
const
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
const
mixed
&
scale
=
scaleFactor
[
0
];
mixed2
energy
=
make_mixed2
(
0
,
0
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
paddedNumAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
&
v
=
velm
[
index
];
int2
pair
=
pairs
[
index
];
mixed4
vparent
=
mask
[
index
]
>=
0
?
velm
[
mask
[
index
]]
:
make_mixed4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
int
atom1
=
pair
.
x
;
mixed
maskedScale
=
mask
[
index
]
==
index
?
1
:
scale
;
int
atom2
=
pair
.
y
;
v
.
x
=
vparent
.
x
+
maskedScale
*
(
v
.
x
-
vparent
.
x
);
mixed4
v1
=
velm
[
atom1
];
v
.
y
=
vparent
.
y
+
maskedScale
*
(
v
.
y
-
vparent
.
y
);
mixed4
v2
=
velm
[
atom2
];
v
.
z
=
vparent
.
z
+
maskedScale
*
(
v
.
z
-
vparent
.
z
);
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
energy
.
x
+=
0.5
f
*
(
m1
+
m2
)
*
(
cv
.
x
*
cv
.
x
+
cv
.
y
*
cv
.
y
+
cv
.
z
*
cv
.
z
);
energy
.
y
+=
0.5
f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv
.
x
*
rv
.
x
+
rv
.
y
*
rv
.
y
+
rv
.
z
*
rv
.
z
);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
x
+=
energy
.
x
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
].
y
+=
energy
.
y
;
}
extern
"C"
__global__
void
scaleAtomsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numAtoms
,
mixed4
*
__restrict__
velm
,
const
int
*
__restrict__
atoms
){
const
mixed
&
scale
=
scaleFactor
[
0
].
x
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
atoms
[
index
];
mixed4
&
v
=
velm
[
atom
];
v
.
x
*=
scale
;
v
.
y
*=
scale
;
v
.
z
*=
scale
;
}
}
extern
"C"
__global__
void
scalePairsVelocities
(
mixed2
*
__restrict__
scaleFactor
,
int
numPairs
,
mixed4
*
__restrict__
velm
,
const
int2
*
__restrict__
pairs
){
const
mixed
&
absScale
=
scaleFactor
[
0
].
x
;
const
mixed
&
relScale
=
scaleFactor
[
0
].
y
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numPairs
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom1
=
pairs
[
index
].
x
;
int
atom2
=
pairs
[
index
].
y
;
mixed4
&
v1
=
velm
[
atom1
];
mixed4
&
v2
=
velm
[
atom2
];
mixed
m1
=
v1
.
w
==
0
?
0
:
1
/
v1
.
w
;
mixed
m2
=
v2
.
w
==
0
?
0
:
1
/
v2
.
w
;
mixed4
cv
;
cv
.
x
=
(
m1
*
v1
.
x
+
m2
*
v2
.
x
)
/
(
m1
+
m2
);
cv
.
y
=
(
m1
*
v1
.
y
+
m2
*
v2
.
y
)
/
(
m1
+
m2
);
cv
.
z
=
(
m1
*
v1
.
z
+
m2
*
v2
.
z
)
/
(
m1
+
m2
);
mixed4
rv
;
rv
.
x
=
v2
.
x
-
v1
.
x
;
rv
.
y
=
v2
.
y
-
v1
.
y
;
rv
.
z
=
v2
.
z
-
v1
.
z
;
v1
.
x
=
absScale
*
cv
.
x
-
relScale
*
rv
.
x
*
m2
/
(
m1
+
m2
);
v1
.
y
=
absScale
*
cv
.
y
-
relScale
*
rv
.
y
*
m2
/
(
m1
+
m2
);
v1
.
z
=
absScale
*
cv
.
z
-
relScale
*
rv
.
z
*
m2
/
(
m1
+
m2
);
v2
.
x
=
absScale
*
cv
.
x
+
relScale
*
rv
.
x
*
m1
/
(
m1
+
m2
);
v2
.
y
=
absScale
*
cv
.
y
+
relScale
*
rv
.
y
*
m1
/
(
m1
+
m2
);
v2
.
z
=
absScale
*
cv
.
z
+
relScale
*
rv
.
z
*
m1
/
(
m1
+
m2
);
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
extern
"C"
__global__
void
reduceEnergyPair
(
const
mixed2
*
__restrict__
energyBuffer
,
mixed2
*
__restrict__
result
,
int
bufferSize
,
int
workGroupSize
)
{
__shared__
mixed2
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed2
sum
=
make_mixed2
(
0
,
0
);
for
(
unsigned
int
idx
=
thread
;
idx
<
bufferSize
;
idx
+=
blockDim
.
x
)
{
sum
.
x
+=
energyBuffer
[
idx
].
x
;
sum
.
y
+=
energyBuffer
[
idx
].
y
;
}
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
workGroupSize
)
{
tempBuffer
[
thread
].
x
+=
tempBuffer
[
thread
+
i
].
x
;
tempBuffer
[
thread
].
y
+=
tempBuffer
[
thread
+
i
].
y
;
}
}
}
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
}
}
platforms/cuda/src/kernels/utilities.cu
View file @
cca3e11a
...
@@ -99,4 +99,4 @@ __global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq,
...
@@ -99,4 +99,4 @@ __global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq,
for
(
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
for
(
int
i
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
posq
[
i
].
w
=
charges
[
atomOrder
[
i
]];
posq
[
i
].
w
=
charges
[
atomOrder
[
i
]];
}
}
}
}
\ No newline at end of file
platforms/reference/src/ReferenceKernels.cpp
View file @
cca3e11a
...
@@ -2477,54 +2477,58 @@ std::pair<double, double> ReferenceNoseHooverChainKernel::propagateChain(Context
...
@@ -2477,54 +2477,58 @@ std::pair<double, double> ReferenceNoseHooverChainKernel::propagateChain(Context
// Get the variables describing the NHC
// Get the variables describing the NHC
int
chainLength
=
nhc
.
getDefaultChainLength
();
int
chainLength
=
nhc
.
getDefaultChainLength
();
int
chainID
=
nhc
.
getDefaultChainID
();
int
chainID
=
nhc
.
getDefaultChainID
();
double
temperature
=
nhc
.
getDefaultTemperature
();
double
collisionFrequency
=
nhc
.
getDefaultCollisionFrequency
();
int
numDOFs
=
nhc
.
getDefaultNumDegreesOfFreedom
();
int
numDOFs
=
nhc
.
getDefaultNumDegreesOfFreedom
();
int
numMTS
=
nhc
.
getDefaultNumMultiTimeSteps
();
int
numMTS
=
nhc
.
getDefaultNumMultiTimeSteps
();
// Get the state of the NHC from the context
// Get the state of the NHC from the context
auto
&
allChainPositions
=
extractNoseHooverPositions
(
context
);
auto
&
allChainPositions
=
extractNoseHooverPositions
(
context
);
auto
&
allChainVelocities
=
extractNoseHooverVelocities
(
context
);
auto
&
allChainVelocities
=
extractNoseHooverVelocities
(
context
);
if
(
allChainPositions
.
size
()
<
chainID
+
1
){
allChainPositions
.
resize
(
chainID
+
1
);
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
}
double
absScale
=
0
;
if
(
allChainVelocities
.
size
()
<
chainID
+
1
){
if
(
nAtoms
)
{
allChainVelocities
.
resize
(
chainID
+
1
);
if
(
allChainPositions
.
size
()
<
2
*
chainID
+
1
){
}
allChainPositions
.
resize
(
2
*
chainID
+
1
);
auto
&
chainPositions
=
allChainPositions
.
at
(
chainID
);
}
auto
&
chainVelocities
=
allChainVelocities
.
at
(
chainID
);
if
(
allChainVelocities
.
size
()
<
2
*
chainID
+
1
){
if
(
chainPositions
.
size
()
<
chainLength
){
allChainVelocities
.
resize
(
2
*
chainID
+
1
);
chainPositions
.
resize
(
chainLength
,
0
);
}
}
auto
&
chainPositions
=
allChainPositions
.
at
(
2
*
chainID
);
if
(
chainVelocities
.
size
()
<
chainLength
){
auto
&
chainVelocities
=
allChainVelocities
.
at
(
2
*
chainID
);
chainVelocities
.
resize
(
chainLength
,
0
);
if
(
chainPositions
.
size
()
<
chainLength
){
chainPositions
.
resize
(
chainLength
,
0
);
}
if
(
chainVelocities
.
size
()
<
chainLength
){
chainVelocities
.
resize
(
chainLength
,
0
);
}
double
temperature
=
nhc
.
getDefaultTemperature
();
double
collisionFrequency
=
nhc
.
getDefaultCollisionFrequency
();
absScale
=
chainPropagator
->
propagate
(
absKE
,
chainVelocities
,
chainPositions
,
numDOFs
,
temperature
,
collisionFrequency
,
timeStep
,
numMTS
,
nhc
.
getDefaultYoshidaSuzukiWeights
());
}
}
double
absScale
=
chainPropagator
->
propagate
(
absKE
,
chainVelocities
,
chainPositions
,
numDOFs
,
temperature
,
collisionFrequency
,
timeStep
,
numMTS
,
nhc
.
getDefaultYoshidaSuzukiWeights
());
double
relScale
=
0
;
double
relScale
=
0
;
int
numPairs
=
nhc
.
getThermostatedPairs
().
size
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
if
(
numPairs
){
if
(
nPairs
)
{
double
relativeTemperature
=
nhc
.
getDefaultRelativeTemperature
();
if
(
allChainPositions
.
size
()
<
2
*
chainID
+
2
){
double
relativeCollisionFrequency
=
nhc
.
getDefaultRelativeCollisionFrequency
();
allChainPositions
.
resize
(
2
*
chainID
+
2
);
if
(
allChainPositions
.
size
()
<
chainID
+
2
){
allChainPositions
.
resize
(
chainID
+
2
);
}
}
if
(
allChainVelocities
.
size
()
<
chainID
+
2
){
if
(
allChainVelocities
.
size
()
<
2
*
chainID
+
2
){
allChainVelocities
.
resize
(
chainID
+
2
);
allChainVelocities
.
resize
(
2
*
chainID
+
2
);
}
}
auto
&
chainPositions
=
allChainPositions
.
at
(
chainID
+
1
);
auto
&
chainPositions
=
allChainPositions
.
at
(
2
*
chainID
+
1
);
auto
&
chainVelocities
=
allChainVelocities
.
at
(
chainID
+
1
);
auto
&
chainVelocities
=
allChainVelocities
.
at
(
2
*
chainID
+
1
);
if
(
chainPositions
.
size
()
<
chainLength
){
if
(
chainPositions
.
size
()
<
chainLength
){
chainPositions
.
resize
(
chainLength
,
0
);
chainPositions
.
resize
(
chainLength
,
0
);
}
}
if
(
chainVelocities
.
size
()
<
chainLength
){
if
(
chainVelocities
.
size
()
<
chainLength
){
chainVelocities
.
resize
(
chainLength
,
0
);
chainVelocities
.
resize
(
chainLength
,
0
);
}
}
relScale
=
chainPropagator
->
propagate
(
relKE
,
chainVelocities
,
chainPositions
,
3
*
numPairs
,
double
temperature
=
nhc
.
getDefaultRelativeTemperature
();
relativeTemperature
,
relativeCollisionFrequency
,
timeStep
,
double
collisionFrequency
=
nhc
.
getDefaultRelativeCollisionFrequency
();
relScale
=
chainPropagator
->
propagate
(
relKE
,
chainVelocities
,
chainPositions
,
3
*
nPairs
,
temperature
,
collisionFrequency
,
timeStep
,
numMTS
,
nhc
.
getDefaultYoshidaSuzukiWeights
());
numMTS
,
nhc
.
getDefaultYoshidaSuzukiWeights
());
}
}
return
{
absScale
,
relScale
};
return
{
absScale
,
relScale
};
}
}
...
@@ -2534,21 +2538,41 @@ double ReferenceNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& contex
...
@@ -2534,21 +2538,41 @@ double ReferenceNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& contex
double
kineticEnergy
=
0
;
double
kineticEnergy
=
0
;
int
chainLength
=
nhc
.
getDefaultChainLength
();
int
chainLength
=
nhc
.
getDefaultChainLength
();
int
chainID
=
nhc
.
getDefaultChainID
();
int
chainID
=
nhc
.
getDefaultChainID
();
double
temperature
=
nhc
.
getDefaultTemperature
();
int
nAtoms
=
nhc
.
getThermostatedAtoms
().
size
();
double
collisionFrequency
=
nhc
.
getDefaultCollisionFrequency
();
int
nPairs
=
nhc
.
getThermostatedPairs
().
size
();
double
kT
=
temperature
*
BOLTZ
;
int
numDOFs
=
nhc
.
getDefaultNumDegreesOfFreedom
();
auto
&
nhcPositions
=
extractNoseHooverPositions
(
context
);
auto
&
nhcPositions
=
extractNoseHooverPositions
(
context
);
auto
&
nhcVelocities
=
extractNoseHooverVelocities
(
context
);
auto
&
nhcVelocities
=
extractNoseHooverVelocities
(
context
);
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
if
(
nAtoms
)
{
double
prefac
=
i
?
1
:
numDOFs
;
double
temperature
=
nhc
.
getDefaultTemperature
();
double
mass
=
prefac
*
kT
/
(
collisionFrequency
*
collisionFrequency
);
double
collisionFrequency
=
nhc
.
getDefaultCollisionFrequency
();
double
velocity
=
nhcVelocities
[
chainID
][
i
];
double
kT
=
temperature
*
BOLTZ
;
// The kinetic energy of this bead
int
numDOFs
=
nhc
.
getDefaultNumDegreesOfFreedom
();
kineticEnergy
+=
0.5
*
mass
*
velocity
*
velocity
;
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
// The potential energy of this bead
double
prefac
=
i
?
1
:
numDOFs
;
double
position
=
nhcPositions
[
chainID
][
i
];
double
mass
=
prefac
*
kT
/
(
collisionFrequency
*
collisionFrequency
);
potentialEnergy
+=
prefac
*
kT
*
position
;
double
velocity
=
nhcVelocities
[
2
*
chainID
][
i
];
// The kinetic energy of this bead
kineticEnergy
+=
0.5
*
mass
*
velocity
*
velocity
;
// The potential energy of this bead
double
position
=
nhcPositions
[
2
*
chainID
][
i
];
potentialEnergy
+=
prefac
*
kT
*
position
;
}
}
if
(
nPairs
)
{
double
temperature
=
nhc
.
getDefaultRelativeTemperature
();
double
collisionFrequency
=
nhc
.
getDefaultRelativeCollisionFrequency
();
double
kT
=
temperature
*
BOLTZ
;
int
numDOFs
=
3
*
nPairs
;
for
(
int
i
=
0
;
i
<
chainLength
;
++
i
)
{
double
prefac
=
i
?
1
:
numDOFs
;
double
mass
=
prefac
*
kT
/
(
collisionFrequency
*
collisionFrequency
);
double
velocity
=
nhcVelocities
[
2
*
chainID
+
1
][
i
];
// The kinetic energy of this bead
kineticEnergy
+=
0.5
*
mass
*
velocity
*
velocity
;
// The potential energy of this bead
double
position
=
nhcPositions
[
2
*
chainID
+
1
][
i
];
potentialEnergy
+=
prefac
*
kT
*
position
;
}
}
}
return
kineticEnergy
+
potentialEnergy
;
return
kineticEnergy
+
potentialEnergy
;
}
}
...
...
plugins/drude/tests/TestDrudeNoseHoover.h
View file @
cca3e11a
...
@@ -117,8 +117,8 @@ void testWaterBox(Platform& platform){
...
@@ -117,8 +117,8 @@ void testWaterBox(Platform& platform){
int
chainLength
=
4
;
int
chainLength
=
4
;
int
numMTS
=
3
;
int
numMTS
=
3
;
int
numYS
=
3
;
int
numYS
=
3
;
double
frequency
=
5
0.0
;
double
frequency
=
10
0.0
;
double
frequencyDrude
=
25
.0
;
double
frequencyDrude
=
80
.0
;
int
randomSeed
=
100
;
int
randomSeed
=
100
;
DrudeNoseHooverIntegrator
integ
(
0.0005
,
system
,
temperature
,
temperatureDrude
,
DrudeNoseHooverIntegrator
integ
(
0.0005
,
system
,
temperature
,
temperatureDrude
,
frequency
,
frequencyDrude
,
chainLength
,
numMTS
,
numYS
);;
frequency
,
frequencyDrude
,
chainLength
,
numMTS
,
numYS
);;
...
@@ -138,12 +138,12 @@ void testWaterBox(Platform& platform){
...
@@ -138,12 +138,12 @@ void testWaterBox(Platform& platform){
context
.
applyConstraints
(
1e-6
);
context
.
applyConstraints
(
1e-6
);
// Equilibrate.
// Equilibrate.
integ
.
step
(
50
00
);
integ
.
step
(
8
00
);
// Compute the internal and center of mass temperatures.
// Compute the internal and center of mass temperatures.
double
totalKE
=
0
;
double
totalKE
=
0
;
int
numSteps
=
1000
00
;
const
int
numSteps
=
8
00
;
double
meanTemp
=
0.0
;
double
meanTemp
=
0.0
;
double
meanDrudeTemp
=
0.0
;
double
meanDrudeTemp
=
0.0
;
double
meanConserved
=
0.0
;
double
meanConserved
=
0.0
;
...
@@ -154,7 +154,6 @@ void testWaterBox(Platform& platform){
...
@@ -154,7 +154,6 @@ void testWaterBox(Platform& platform){
double
PE
=
state
.
getPotentialEnergy
();
double
PE
=
state
.
getPotentialEnergy
();
double
fullKE
=
integ
.
computeTotalKineticEnergy
();
double
fullKE
=
integ
.
computeTotalKineticEnergy
();
double
drudeKE
=
integ
.
computeDrudeKineticEnergy
();
double
drudeKE
=
integ
.
computeDrudeKineticEnergy
();
KE
=
fullKE
-
drudeKE
;
double
temp
=
KE
/
(
0.5
*
numStandardDof
*
BOLTZ
);
double
temp
=
KE
/
(
0.5
*
numStandardDof
*
BOLTZ
);
double
drudeTemp
=
drudeKE
/
(
0.5
*
numDrudeDof
*
BOLTZ
);
double
drudeTemp
=
drudeKE
/
(
0.5
*
numDrudeDof
*
BOLTZ
);
meanTemp
=
(
i
*
meanTemp
+
temp
)
/
(
i
+
1
);
meanTemp
=
(
i
*
meanTemp
+
temp
)
/
(
i
+
1
);
...
@@ -162,7 +161,7 @@ void testWaterBox(Platform& platform){
...
@@ -162,7 +161,7 @@ void testWaterBox(Platform& platform){
double
heatBathEnergy
=
integ
.
computeHeatBathEnergy
();
double
heatBathEnergy
=
integ
.
computeHeatBathEnergy
();
double
conserved
=
PE
+
fullKE
+
heatBathEnergy
;
double
conserved
=
PE
+
fullKE
+
heatBathEnergy
;
meanConserved
=
(
i
*
meanConserved
+
conserved
)
/
(
i
+
1
);
meanConserved
=
(
i
*
meanConserved
+
conserved
)
/
(
i
+
1
);
#define DEBUG
1
#define DEBUG
0
#if DEBUG
#if DEBUG
if
(
i
%
10
==
0
)
if
(
i
%
10
==
0
)
std
::
cout
<<
std
::
setw
(
6
)
<<
i
std
::
cout
<<
std
::
setw
(
6
)
<<
i
...
@@ -170,15 +169,17 @@ void testWaterBox(Platform& platform){
...
@@ -170,15 +169,17 @@ void testWaterBox(Platform& platform){
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
drudeKE
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
drudeKE
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
meanTemp
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
meanTemp
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
meanDrudeTemp
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
meanDrudeTemp
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
heatBathEnergy
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
fullKE
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
conserved
<<
std
::
setprecision
(
8
)
<<
std
::
setw
(
16
)
<<
conserved
<<
std
::
endl
;
<<
std
::
endl
;
#endif
#endif
totalKE
+=
KE
;
totalKE
+=
KE
;
//
ASSERT(fabs(meanConserved - conserved) < 0.
5
);
ASSERT
(
fabs
(
meanConserved
-
conserved
)
<
0.
2
);
}
}
totalKE
/=
numSteps
;
totalKE
/=
numSteps
;
//
ASSERT_USUALLY_EQUAL_TOL(temperature, meanTemp, 0.0
2
);
ASSERT_USUALLY_EQUAL_TOL
(
temperature
,
meanTemp
,
0.0
3
);
//
ASSERT_USUALLY_EQUAL_TOL(temperatureDrude, meanDrudeTemp, 0.
1
0);
ASSERT_USUALLY_EQUAL_TOL
(
temperatureDrude
,
meanDrudeTemp
,
0.0
3
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment