Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
98f73120
Unverified
Commit
98f73120
authored
Oct 25, 2019
by
Andy Simmonett
Browse files
Implement NHC for OpenCL platform
parent
80e1b66f
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
1112 additions
and
2 deletions
+1112
-2
platforms/opencl/include/OpenCLIntegrationUtilities.h
platforms/opencl/include/OpenCLIntegrationUtilities.h
+8
-0
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+104
-0
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+44
-0
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+4
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+418
-2
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+2
-0
platforms/opencl/src/kernels/diffcucl.txt
platforms/opencl/src/kernels/diffcucl.txt
+128
-0
platforms/opencl/src/kernels/noseHooverChain.cl
platforms/opencl/src/kernels/noseHooverChain.cl
+170
-0
platforms/opencl/src/kernels/velocityVerlet.cl
platforms/opencl/src/kernels/velocityVerlet.cl
+101
-0
platforms/opencl/tests/TestOpenCLNoseHooverIntegrator.cpp
platforms/opencl/tests/TestOpenCLNoseHooverIntegrator.cpp
+36
-0
platforms/opencl/tests/TestOpenCLNoseHooverThermostat.cpp
platforms/opencl/tests/TestOpenCLNoseHooverThermostat.cpp
+36
-0
plugins/drude/platforms/opencl/tests/CMakeLists.txt
plugins/drude/platforms/opencl/tests/CMakeLists.txt
+1
-0
plugins/drude/platforms/opencl/tests/TestOpenCLDrudeNoseHoover.cpp
...rude/platforms/opencl/tests/TestOpenCLDrudeNoseHoover.cpp
+60
-0
No files found.
platforms/opencl/include/OpenCLIntegrationUtilities.h
View file @
98f73120
...
...
@@ -31,6 +31,7 @@
#include "OpenCLContext.h"
#include "windowsExportOpenCL.h"
#include <iosfwd>
#include <map>
namespace
OpenMM
{
...
...
@@ -119,6 +120,12 @@ public:
* @param timeShift the amount by which to shift the velocities in time
*/
double
computeKineticEnergy
(
double
timeShift
);
/**
* Get the data structure that holds the state of all Nose-Hoover chains
*
* @return vector of chain states
*/
std
::
map
<
int
,
OpenCLArray
>&
getNoseHooverChainState
();
private:
void
applyConstraints
(
bool
constrainVelocities
,
double
tol
);
OpenCLContext
&
context
;
...
...
@@ -166,6 +173,7 @@ private:
mm_double2
lastStepSize
;
struct
ShakeCluster
;
struct
ConstraintOrderer
;
std
::
map
<
int
,
OpenCLArray
>
noseHooverChainState
;
};
}
// namespace OpenMM
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
98f73120
...
...
@@ -36,6 +36,7 @@
#include "openmm/kernels.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "openmm/internal/NoseHooverChain.h"
#include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include "openmm/System.h"
...
...
@@ -1346,6 +1347,43 @@ private:
cl
::
Kernel
kernel1
,
kernel2
;
};
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class
OpenCLIntegrateVelocityVerletStepKernel
:
public
IntegrateVelocityVerletStepKernel
{
public:
OpenCLIntegrateVelocityVerletStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateVelocityVerletStepKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLIntegrateVelocityVerletStepKernel
()
{}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NoseHooverIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void
execute
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double
computeKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverIntegrator
&
integrator
);
private:
OpenCLContext
&
cl
;
cl
::
Kernel
kernel1
,
kernel2
,
kernel3
;
};
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
...
...
@@ -1667,6 +1705,72 @@ private:
cl
::
Kernel
kernel
;
};
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class
OpenCLNoseHooverChainKernel
:
public
NoseHooverChainKernel
{
public:
OpenCLNoseHooverChainKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
NoseHooverChainKernel
(
name
,
platform
),
cl
(
cl
)
{
}
~
OpenCLNoseHooverChainKernel
()
{}
/**
* Initialize the kernel.
*/
virtual
void
initialize
();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
virtual
std
::
pair
<
double
,
double
>
propagateChain
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
,
std
::
pair
<
double
,
double
>
kineticEnergies
,
double
timeStep
);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
virtual
double
computeHeatBathEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
nhc
);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
virtual
std
::
pair
<
double
,
double
>
computeMaskedKineticEnergy
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
bool
downloadValue
);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
virtual
void
scaleVelocities
(
ContextImpl
&
context
,
const
NoseHooverChain
&
noseHooverChain
,
std
::
pair
<
double
,
double
>
scaleFactors
);
private:
int
sumWorkGroupSize
;
OpenCLContext
&
cl
;
OpenCLArray
scaleFactorBuffer
,
kineticEnergyBuffer
,
chainMasses
,
chainForces
,
heatBathEnergy
;
std
::
map
<
int
,
OpenCLArray
>
atomlists
,
pairlists
;
std
::
map
<
int
,
cl
::
Kernel
>
propagateKernels
;
cl
::
Kernel
reduceEnergyKernel
;
cl
::
Kernel
computeHeatBathEnergyKernel
;
cl
::
Kernel
computeAtomsKineticEnergyKernel
;
cl
::
Kernel
computePairsKineticEnergyKernel
;
cl
::
Kernel
scaleAtomsVelocitiesKernel
;
cl
::
Kernel
scalePairsVelocitiesKernel
;
};
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
98f73120
...
...
@@ -848,6 +848,24 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
}
void
OpenCLIntegrationUtilities
::
createCheckpoint
(
ostream
&
stream
)
{
size_t
numChains
=
noseHooverChainState
.
size
();
bool
useDouble
=
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
();
stream
.
write
((
char
*
)
&
numChains
,
sizeof
(
size_t
));
for
(
auto
&
chainState
:
noseHooverChainState
){
int
chainID
=
chainState
.
first
;
size_t
chainLength
=
chainState
.
second
.
getSize
();
stream
.
write
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
write
((
char
*
)
&
chainLength
,
sizeof
(
size_t
));
if
(
useDouble
)
{
vector
<
mm_double2
>
stateVec
;
chainState
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_double2
)
*
chainLength
);
}
else
{
vector
<
mm_float2
>
stateVec
;
chainState
.
second
.
download
(
stateVec
);
stream
.
write
((
char
*
)
stateVec
.
data
(),
sizeof
(
mm_float2
)
*
chainLength
);
}
}
if
(
!
random
.
isInitialized
())
return
;
stream
.
write
((
char
*
)
&
randomPos
,
sizeof
(
int
));
...
...
@@ -860,6 +878,28 @@ void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) {
}
void
OpenCLIntegrationUtilities
::
loadCheckpoint
(
istream
&
stream
)
{
size_t
numChains
,
chainLength
;
bool
useDouble
=
context
.
getUseDoublePrecision
()
||
context
.
getUseMixedPrecision
();
stream
.
read
((
char
*
)
&
numChains
,
sizeof
(
size_t
));
noseHooverChainState
.
clear
();
for
(
size_t
i
=
0
;
i
<
numChains
;
i
++
){
int
chainID
;
stream
.
read
((
char
*
)
&
chainID
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
chainLength
,
sizeof
(
size_t
));
if
(
useDouble
)
{
noseHooverChainState
[
chainID
]
=
OpenCLArray
();
noseHooverChainState
[
chainID
].
initialize
<
mm_double2
>
(
context
,
chainLength
,
"chainState"
+
std
::
to_string
(
chainID
));
std
::
vector
<
mm_double2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_double2
)
*
chainLength
);
noseHooverChainState
[
chainID
].
upload
(
stateVec
);
}
else
{
noseHooverChainState
[
chainID
]
=
OpenCLArray
();
noseHooverChainState
[
chainID
].
initialize
<
mm_float2
>
(
context
,
chainLength
,
"chainState"
+
std
::
to_string
(
chainID
));
std
::
vector
<
mm_float2
>
stateVec
(
chainLength
);
stream
.
read
((
char
*
)
&
stateVec
[
0
],
sizeof
(
mm_float2
)
*
chainLength
);
noseHooverChainState
[
chainID
].
upload
(
stateVec
);
}
}
if
(
!
random
.
isInitialized
())
return
;
stream
.
read
((
char
*
)
&
randomPos
,
sizeof
(
int
));
...
...
@@ -918,3 +958,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
posDelta
.
copyTo
(
context
.
getVelm
());
return
0.5
*
energy
;
}
std
::
map
<
int
,
OpenCLArray
>&
OpenCLIntegrationUtilities
::
getNoseHooverChainState
(){
return
noseHooverChainState
;
};
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
98f73120
...
...
@@ -128,6 +128,10 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLIntegrateCustomStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
OpenCLApplyAndersenThermostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
NoseHooverChainKernel
::
Name
())
return
new
OpenCLNoseHooverChainKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateVelocityVerletStepKernel
::
Name
())
return
new
OpenCLIntegrateVelocityVerletStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
return
new
OpenCLApplyMonteCarloBarostatKernel
(
name
,
platform
,
cl
);
if
(
name
==
RemoveCMMotionKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
98f73120
...
...
@@ -53,6 +53,7 @@
#include "SimTKOpenMMUtilities.h"
#include "jama_eig.h"
#include <algorithm>
#include <assert.h>
#include <cmath>
#include <iterator>
#include <set>
...
...
@@ -413,7 +414,7 @@ void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, co
}
void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
int version =
2
;
int version =
3
;
stream.write((char*) &version, sizeof(int));
int precision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int));
...
...
@@ -444,7 +445,7 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
int version;
stream.read((char*) &version, sizeof(int));
if (version !=
2
)
if (version !=
3
)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
int precision;
stream.read((char*) &precision, sizeof(int));
...
...
@@ -7391,6 +7392,79 @@ double OpenCLIntegrateVerletStepKernel::computeKineticEnergy(ContextImpl& contex
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
void OpenCLIntegrateVelocityVerletStepKernel::initialize(const System& system, const NoseHooverIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
map<string, string> defines;
cl::Program program = cl.createProgram(OpenCLKernelSources::velocityVerlet, defines, "");
kernel1 = cl::Kernel(program, "integrateVelocityVerletPart1");
kernel2 = cl::Kernel(program, "integrateVelocityVerletPart2");
kernel3 = cl::Kernel(program, "integrateVelocityVerletPart3");
}
void OpenCLIntegrateVelocityVerletStepKernel::execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms();
int paddedNumAtoms = cl.getPaddedNumAtoms();
double dt = integrator.getStepSize();
cl.getIntegrationUtilities().setNextStepSize(dt);
if( !forcesAreValid ) context.calcForcesAndEnergy(true, false);
//// Call the first integration kernel.
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl_int>(1, paddedNumAtoms);
kernel1.setArg<cl::Buffer>(2, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel1, 4);
kernel1.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel1, numAtoms);
//// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
//// Call the second integration kernel.
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel2, 3);
kernel2.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel2, numAtoms);
integration.computeVirtualSites();
//// Update forces
context.calcForcesAndEnergy(true, false);
forcesAreValid = true;
//// Call the third integration kernel.
kernel3.setArg<cl_int>(0, numAtoms);
kernel3.setArg<cl_int>(1, paddedNumAtoms);
kernel3.setArg<cl::Buffer>(2, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel3, 4);
kernel3.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(7, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel3, numAtoms);
integration.applyVelocityConstraints(integrator.getConstraintTolerance());
//// Update the time and step count.
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
}
double OpenCLIntegrateVelocityVerletStepKernel::computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0);
}
void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
...
...
@@ -8682,6 +8756,348 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
kernel.setArg<cl_uint>(5, cl.getIntegrationUtilities().prepareRandomNumbers(cl.getPaddedNumAtoms()));
cl.executeKernel(kernel, cl.getNumAtoms());
}
void OpenCLNoseHooverChainKernel::initialize() {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
map<string, string> defines;
defines["BEGIN_YS_LOOP"] = "const real arr[1] = {1.0}; for(int i=0;i<1;++i) { const real ys = arr[i];";
defines["END_YS_LOOP"] = "}";
cl::Program program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[1] = cl::Kernel(program, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[3] = {0.828981543588751, -0.657963087177502, 0.828981543588751}; for(int i=0;i<3;++i) { const real ys = arr[i];";
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[3] = cl::Kernel(program, "propagateNoseHooverChain");
defines["BEGIN_YS_LOOP"] = "const real arr[5] = {0.2967324292201065, 0.2967324292201065, -0.186929716880426, 0.2967324292201065, 0.2967324292201065}; for(int i=0;i<5;++i) { const real ys = arr[i];";
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
propagateKernels[5] = cl::Kernel(program, "propagateNoseHooverChain");
program = cl.createProgram(OpenCLKernelSources::noseHooverChain, defines);
reduceEnergyKernel = cl::Kernel(program, "reduceEnergyPair");
computeHeatBathEnergyKernel = cl::Kernel(program, "computeHeatBathEnergy");
computeAtomsKineticEnergyKernel = cl::Kernel(program, "computeAtomsKineticEnergy");
computePairsKineticEnergyKernel = cl::Kernel(program, "computePairsKineticEnergy");
scaleAtomsVelocitiesKernel = cl::Kernel(program, "scaleAtomsVelocities");
scalePairsVelocitiesKernel = cl::Kernel(program, "scalePairsVelocities");
}
std::pair<double, double> OpenCLNoseHooverChainKernel::propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int nAtoms = nhc.getThermostatedAtoms().size();
int nPairs = nhc.getThermostatedPairs().size();
int chainLength = nhc.getDefaultChainLength();
int numYS = nhc.getDefaultNumYoshidaSuzukiTimeSteps();
int numMTS = nhc.getDefaultNumMultiTimeSteps();
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double relativeTemperature = nhc.getDefaultRelativeTemperature();
double relativeFrequency = nhc.getDefaultRelativeCollisionFrequency();
if (numYS != 1 && numYS != 3 && numYS != 5) {
throw OpenMMException("Number of Yoshida Suzuki time steps has to be 1, 3, or 5.");
}
auto & chainState = cl.getIntegrationUtilities().getNoseHooverChainState();
if (!scaleFactorBuffer.isInitialized() ||scaleFactorBuffer.getSize() == 0) {
if(useDouble){
std::vector<mm_double2> zeros{{0,0}};
scaleFactorBuffer.initialize<mm_double2>(cl, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
} else {
std::vector<mm_float2> zeros{{0,0}};
scaleFactorBuffer.initialize<mm_float2>(cl, 1, "scaleFactorBuffer");
scaleFactorBuffer.upload(zeros);
}
}
if (!chainForces.isInitialized() || !chainMasses.isInitialized() ){
if(useDouble){
std::vector<cl_double> zeros(chainLength,0);
chainMasses.initialize<cl_double>(cl, chainLength, "chainMasses");
chainForces.initialize<cl_double>(cl, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
} else {
std::vector<cl_float> zeros(chainLength,0);
chainMasses.initialize<cl_float>(cl, chainLength, "chainMasses");
chainForces.initialize<cl_float>(cl, chainLength, "chainForces");
chainMasses.upload(zeros);
chainForces.upload(zeros);
}
}
if (chainForces.getSize() < chainLength) chainMasses.resize(chainLength);
if (chainMasses.getSize() < chainLength) chainMasses.resize(chainLength);
float timeStepFloat = (float) timeStep;
// N.B. We ignore the incoming kineticEnergy and grab it from the device buffer instead
if (nAtoms) {
if (!chainState.count(2*chainID)) chainState[2*chainID] = OpenCLArray();
if (chainState.at(2*chainID).getSize() != chainLength) {
// We need to upload the OpenCL array
if(useDouble){
chainState.at(2*chainID).initialize<mm_double2>(cl, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID).upload(zeros.data());
} else {
chainState.at(2*chainID).initialize<mm_float2>(cl, chainLength, "chainState" + std::to_string(2*chainID));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID).upload(zeros.data());
}
}
int chainType = 0;
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
propagateKernels[numYS].setArg<cl::Buffer>(0, chainState[2*chainID].getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(2, scaleFactorBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(3, chainMasses.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(4, chainForces.getDeviceBuffer());
propagateKernels[numYS].setArg<cl_int>(5, chainType);
propagateKernels[numYS].setArg<cl_int>(6, chainLength);
propagateKernels[numYS].setArg<cl_int>(7, numMTS);
propagateKernels[numYS].setArg<cl_int>(8, numDOFs);
propagateKernels[numYS].setArg<cl_float>(9, timeStepFloat);
if (useDouble)
propagateKernels[numYS].setArg<cl_double>(10, kT);
else
propagateKernels[numYS].setArg<cl_float>(10, kTfloat);
propagateKernels[numYS].setArg<cl_float>(11, frequencyFloat);
cl.executeKernel(propagateKernels[numYS], 1, 1);
}
if (nPairs) {
if (!chainState.count(2*chainID+1)) chainState[2*chainID+1] = OpenCLArray();
if (chainState.at(2*chainID+1).getSize() != chainLength) {
// We need to upload the OpenCL array
if(useDouble){
chainState.at(2*chainID+1).initialize<mm_double2>(cl, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_double2> zeros(chainLength, mm_double2(0.0, 0.0));
chainState.at(2*chainID+1).upload(zeros.data());
} else {
chainState.at(2*chainID+1).initialize<mm_float2>(cl, chainLength, "chainState" + std::to_string(2*chainID+1));
std::vector<mm_float2> zeros(chainLength, mm_float2(0.0f, 0.0f));
chainState.at(2*chainID+1).upload(zeros.data());
}
}
int chainType = 1;
double kT = BOLTZ * relativeTemperature;
int ndf = 3*nPairs;
float kTfloat = (float) kT;
float frequencyFloat = (float) relativeFrequency;
propagateKernels[numYS].setArg<cl::Buffer>(0, chainState[2*chainID+1].getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(2, scaleFactorBuffer.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(3, chainMasses.getDeviceBuffer());
propagateKernels[numYS].setArg<cl::Buffer>(4, chainForces.getDeviceBuffer());
propagateKernels[numYS].setArg<cl_int>(5, chainType);
propagateKernels[numYS].setArg<cl_int>(6, chainLength);
propagateKernels[numYS].setArg<cl_int>(7, numMTS);
propagateKernels[numYS].setArg<cl_int>(8, ndf);
propagateKernels[numYS].setArg<cl_float>(9, timeStepFloat);
if (useDouble)
propagateKernels[numYS].setArg<cl_double>(10, kT);
else
propagateKernels[numYS].setArg<cl_float>(10, kTfloat);
propagateKernels[numYS].setArg<cl_float>(11, frequencyFloat);
cl.executeKernel(propagateKernels[numYS], 1, 1);
}
return {0, 0};
}
double OpenCLNoseHooverChainKernel::computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
int chainLength = nhc.getDefaultChainLength();
auto & chainState = cl.getIntegrationUtilities().getNoseHooverChainState();
bool absChainIsValid = chainState.count(2*chainID) != 0 &&
chainState[2*chainID].isInitialized() &&
chainState[2*chainID].getSize() == chainLength;
bool relChainIsValid = chainState.count(2*chainID+1) != 0 &&
chainState[2*chainID+1].isInitialized() &&
chainState[2*chainID+1].getSize() == chainLength;
if (!absChainIsValid && !relChainIsValid) return 0.0;
if (!heatBathEnergy.isInitialized() || heatBathEnergy.getSize() == 0) {
if(useDouble){
std::vector<cl_double> one(1);
heatBathEnergy.initialize<cl_double>(cl, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
} else {
std::vector<cl_float> one(1);
heatBathEnergy.initialize<cl_float>(cl, 1, "heatBathEnergy");
heatBathEnergy.upload(one);
}
}
cl.clearBuffer(heatBathEnergy);
if (absChainIsValid) {
int numDOFs = nhc.getDefaultNumDegreesOfFreedom();
double temperature = nhc.getDefaultTemperature();
double frequency = nhc.getDefaultCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
computeHeatBathEnergyKernel.setArg<cl::Buffer>(0, heatBathEnergy.getDeviceBuffer());
computeHeatBathEnergyKernel.setArg<cl_int>(1, chainLength);
computeHeatBathEnergyKernel.setArg<cl_int>(2, numDOFs);
if (useDouble)
computeHeatBathEnergyKernel.setArg<cl_double>(3, kT);
else
computeHeatBathEnergyKernel.setArg<cl_float>(3, kTfloat);
computeHeatBathEnergyKernel.setArg<cl_float>(4, frequencyFloat);
computeHeatBathEnergyKernel.setArg<cl::Buffer>(5, chainState[2*chainID].getDeviceBuffer());
cl.executeKernel(computeHeatBathEnergyKernel, 1, 1);
}
if (relChainIsValid) {
int numDOFs = 3 * nhc.getThermostatedPairs().size();
double temperature = nhc.getDefaultRelativeTemperature();
double frequency = nhc.getDefaultRelativeCollisionFrequency();
double kT = BOLTZ * temperature;
float kTfloat = (float) kT;
float frequencyFloat = (float) frequency;
computeHeatBathEnergyKernel.setArg<cl::Buffer>(0, heatBathEnergy.getDeviceBuffer());
computeHeatBathEnergyKernel.setArg<cl_int>(1, chainLength);
computeHeatBathEnergyKernel.setArg<cl_int>(2, numDOFs);
if (useDouble)
computeHeatBathEnergyKernel.setArg<cl_double>(3, kT);
else
computeHeatBathEnergyKernel.setArg<cl_float>(3, kTfloat);
computeHeatBathEnergyKernel.setArg<cl_float>(4, frequencyFloat);
computeHeatBathEnergyKernel.setArg<cl::Buffer>(5, chainState[2*chainID+1].getDeviceBuffer());
cl.executeKernel(computeHeatBathEnergyKernel, 1, 1);
}
void * pinnedBuffer = cl.getPinnedBuffer();
heatBathEnergy.download(pinnedBuffer);
if (useDouble){
return *((double*) pinnedBuffer);
} else {
return *((float*) pinnedBuffer);
}
}
std::pair<double, double> OpenCLNoseHooverChainKernel::computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &nhc, bool downloadValue) {
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
int chainID = nhc.getDefaultChainID();
const auto & nhcAtoms = nhc.getThermostatedAtoms();
const auto & nhcPairs = nhc.getThermostatedPairs();
auto nAtoms = nhcAtoms.size();
auto nPairs = nhcPairs.size();
if (nAtoms) {
if (!atomlists.count(chainID)) {
// We need to upload the OpenCL array
atomlists[chainID] = OpenCLArray();
atomlists[chainID].initialize<int>(cl, nAtoms, "atomlist" + std::to_string(chainID));
atomlists[chainID].upload(nhcAtoms);
}
if (atomlists[chainID].getSize() != nAtoms) {
throw OpenMMException("Number of atoms changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (nPairs) {
if (!pairlists.count(chainID)) {
// We need to upload the OpenCL array
pairlists[chainID] = OpenCLArray();
pairlists[chainID].initialize<mm_int2>(cl, nPairs, "pairlist" + std::to_string(chainID));
std::vector<mm_int2> int2vec;
for(const auto &p : nhcPairs) int2vec.push_back(mm_int2(p.first, p.second));
pairlists[chainID].upload(int2vec);
}
if (pairlists[chainID].getSize() != nPairs) {
throw OpenMMException("Number of thermostated pairs changed. Cannot be handled by the same Nose-Hoover thermostat.");
}
}
if (!kineticEnergyBuffer.isInitialized() || kineticEnergyBuffer.getSize() == 0) {
if(useDouble){
std::vector<mm_double2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_double2>(cl, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
} else {
std::vector<mm_float2> zeros{{0,0}};
kineticEnergyBuffer.initialize<mm_float2>(cl, 1, "kineticEnergyBuffer");
kineticEnergyBuffer.upload(zeros);
}
}
cl.clearBuffer(cl.getEnergyBuffer());
if (nAtoms) {
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl_int>(1, nAtoms);
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computeAtomsKineticEnergyKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(computeAtomsKineticEnergyKernel, nAtoms);
}
if (nPairs) {
computePairsKineticEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl_int>(1, nPairs);
computePairsKineticEnergyKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
computePairsKineticEnergyKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(computePairsKineticEnergyKernel, nPairs);
}
int bufferSize = cl.getEnergyBuffer().getSize() / 2; // Halve it to account for the fact that we're storing mixed2 instead of mixed in there
int workGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, kineticEnergyBuffer.getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, bufferSize);
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, 2*workGroupSize*cl.getEnergyBuffer().getElementSize(), NULL);
cl.executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
std::pair<double, double> KEs = {0, 0};
if (downloadValue) {
if (useDouble) {
mm_double2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
} else {
mm_float2 tmp;
kineticEnergyBuffer.download(&tmp);
KEs.first = tmp.x;
KEs.second = tmp.y;
}
}
return KEs;
}
void OpenCLNoseHooverChainKernel::scaleVelocities(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> scaleFactor) {
// For now we assume that the atoms and pairs info is valid, because compute{Atoms|Pairs}KineticEnergy must have been
// called before this kernel. If that ever ceases to be true, some sanity checks are needed here.
int chainID = nhc.getDefaultChainID();
auto nAtoms = nhc.getThermostatedAtoms().size();
auto nPairs = nhc.getThermostatedPairs().size();
if(nAtoms) {
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(0, scaleFactorBuffer.getDeviceBuffer());
scaleAtomsVelocitiesKernel.setArg<cl_int>(1, nAtoms);
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
scaleAtomsVelocitiesKernel.setArg<cl::Buffer>(3, atomlists[chainID].getDeviceBuffer());
cl.executeKernel(scaleAtomsVelocitiesKernel, nAtoms);
}
if(nPairs) {
scalePairsVelocitiesKernel.setArg<cl::Buffer>(0, scaleFactorBuffer.getDeviceBuffer());
scalePairsVelocitiesKernel.setArg<cl_int>(1, nPairs);
scalePairsVelocitiesKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
scalePairsVelocitiesKernel.setArg<cl::Buffer>(3, pairlists[chainID].getDeviceBuffer());
cl.executeKernel(scalePairsVelocitiesKernel, nPairs);
}
}
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
98f73120
...
...
@@ -87,12 +87,14 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVelocityVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVariableLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateCustomStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyAndersenThermostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
NoseHooverChainKernel
::
Name
(),
factory
);
registerKernelFactory
(
ApplyMonteCarloBarostatKernel
::
Name
(),
factory
);
registerKernelFactory
(
RemoveCMMotionKernel
::
Name
(),
factory
);
platformProperties
.
push_back
(
OpenCLDeviceIndex
());
...
...
platforms/opencl/src/kernels/diffcucl.txt
0 → 100644
View file @
98f73120
2c2
< * Perform the first step of Verlet integration.
---
> * Perform the first step of verlet integration.
5,11c5,10
< extern "C" __global__ void integrateVerletPart1(int numAtoms, int paddedNumAtoms, const mixed2* __restrict__ dt, const real4* __restrict__ posq,
< const real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta) {
< const mixed2 stepSize = dt[0];
< const mixed dtPos = stepSize.y;
< const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
< const mixed scale = dtVel/(mixed) 0x100000000;
< for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
---
> __kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
> mixed2 stepSize = dt[0];
> mixed dtPos = stepSize.y;
> mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
> int index = get_global_id(0);
> while (index < numAtoms) {
17c16
< mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
> mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
21,26c20,23
< velocity.x += scale*force[index]*velocity.w;
< velocity.y += scale*force[index+paddedNumAtoms]*velocity.w;
< velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w;
< pos.x = velocity.x*dtPos;
< pos.y = velocity.y*dtPos;
< pos.z = velocity.z*dtPos;
---
> velocity.x += force[index].x*dtVel*velocity.w;
> velocity.y += force[index].y*dtVel*velocity.w;
> velocity.z += force[index].z*dtVel*velocity.w;
> pos.xyz = velocity.xyz*dtPos;
29a27
> index += get_global_size(0);
34c32
< * Perform the second step of Verlet integration.
---
> * Perform the second step of verlet integration.
37,38c35
< extern "C" __global__ void integrateVerletPart2(int numAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq,
< real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) {
---
> __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
40c37
< #if __CUDA_ARCH__ >= 130
---
> #ifdef SUPPORTS_DOUBLE_PRECISION
46,47c43
< int index = blockIdx.x*blockDim.x+threadIdx.x;
< if (index == 0)
---
> if (get_global_id(0) == 0)
49c45,47
< for (; index < numAtoms; index += blockDim.x*gridDim.x) {
---
> barrier(CLK_LOCAL_MEM_FENCE);
> int index = get_global_id(0);
> while (index < numAtoms) {
55c53
< mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
> mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
60,64c58,60
< pos.x += delta.x;
< pos.y += delta.y;
< pos.z += delta.z;
< #if __CUDA_ARCH__ >= 130
< velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w);
---
> pos.xyz += delta.xyz;
> #ifdef SUPPORTS_DOUBLE_PRECISION
> velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
66c62
< velocity = make_mixed4((mixed) (delta.x*oneOverDt+delta.x*correction), (mixed) (delta.y*oneOverDt+delta.y*correction), (mixed) (delta.z*oneOverDt+delta.z*correction), velocity.w);
---
> velocity.xyz = delta.xyz*oneOverDt + delta.xyz*correction;
69,70c65,66
< posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
< posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
---
> posq[index] = convert_real4(pos);
> posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
75a72
> index += get_global_size(0);
83c80
< extern "C" __global__ void selectVerletStepSize(int numAtoms, int paddedNumAtoms, mixed maxStepSize, mixed errorTol, mixed2* __restrict__ dt, const mixed4* __restrict__ velm, const long long* __restrict__ force) {
---
> __kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
86,90c83,86
< extern __shared__ mixed error[];
< mixed err = 0.0f;
< const mixed scale = RECIP((mixed) 0x100000000);
< for (int index = threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
< mixed3 f = make_mixed3(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2]);
---
> mixed err = 0;
> int index = get_local_id(0);
> while (index < numAtoms) {
> real4 f = force[index];
92a89
> index += get_global_size(0);
94,95c91,92
< error[threadIdx.x] = err;
< __syncthreads();
---
> error[get_local_id(0)] = err;
> barrier(CLK_LOCAL_MEM_FENCE);
99,102c96,99
< for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
< if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
< error[threadIdx.x] += error[threadIdx.x+offset];
< __syncthreads();
---
> for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
> if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
> error[get_local_id(0)] += error[get_local_id(0)+offset];
> barrier(CLK_LOCAL_MEM_FENCE);
104,106c101,103
< if (threadIdx.x == 0) {
< mixed totalError = SQRT(error[0]/(numAtoms*3));
< mixed newStepSize = SQRT(errorTol/totalError);
---
> if (get_local_id(0) == 0) {
> mixed totalError = sqrt(error[0]/(numAtoms*3));
> mixed newStepSize = sqrt(errorTol/totalError);
platforms/opencl/src/kernels/noseHooverChain.cl
0 → 100644
View file @
98f73120
//#include
<initializer_list>
__kernel
void
propagateNoseHooverChain
(
__global
mixed2*
restrict
chainData,
__global
const
mixed2
*
restrict
energySum,
__global
mixed2*
restrict
scaleFactor,
__global
mixed*
restrict
chainMasses,
__global
mixed*
restrict
chainForces,
int
chainType,
int
chainLength,
int
numMTS,
int
numDOFs,
float
timeStep,
mixed
kT,
float
frequency
)
{
const
mixed
kineticEnergy
=
chainType
==
0
?
energySum[0].x
:
energySum[0].y
;
mixed
scale
=
1
;
if
(
kineticEnergy
<
1e-8
)
return
;
for
(
int
bead
=
0
; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0]
*=
numDOFs
;
mixed
KE2
=
2.0f
*
kineticEnergy
;
mixed
timeOverMTS
=
timeStep
/
numMTS
;
chainForces[0]
=
(
KE2
-
numDOFs
*
kT
)
/
chainMasses[0]
;
for
(
int
bead
=
0
; bead < chainLength - 1; ++bead) {
chainForces[bead
+
1]
=
(
chainMasses[bead]
*
chainData[bead].y
*
chainData[bead].y
-
kT
)
/
chainMasses[bead
+
1]
;
}
for
(
int
mts
=
0
; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
mixed
wdt
=
ys
*
timeOverMTS
;
chainData[chainLength-1].y
+=
0.25f
*
wdt
*
chainForces[chainLength-1]
;
for
(
int
bead
=
chainLength
-
2
; bead >= 0; --bead) {
mixed
aa
=
EXP
(
-0.125f
*
wdt
*
chainData[bead
+
1].y
)
;
chainData[bead].y
=
aa
*
(
chainData[bead].y
*
aa
+
0.25f
*
wdt
*
chainForces[bead]
)
;
}
//
update
particle
velocities
mixed
aa
=
EXP
(
-0.5f
*
wdt
*
chainData[0].y
)
;
scale
*=
aa
;
//
update
the
thermostat
positions
for
(
int
bead
=
0
; bead < chainLength; ++bead) {
chainData[bead].x
+=
0.5f
*
chainData[bead].y
*
wdt
;
}
//
update
the
forces
chainForces[0]
=
(
scale
*
scale
*
KE2
-
numDOFs
*
kT
)
/
chainMasses[0]
;
//
update
thermostat
velocities
for
(
int
bead
=
0
; bead < chainLength - 1; ++bead) {
mixed
aa
=
EXP
(
-0.125f
*
wdt
*
chainData[bead
+
1].y
)
;
chainData[bead].y
=
aa
*
(
aa
*
chainData[bead].y
+
0.25f
*
wdt
*
chainForces[bead]
)
;
chainForces[bead
+
1]
=
(
chainMasses[bead]
*
chainData[bead].y
*
chainData[bead].y
-
kT
)
/
chainMasses[bead
+
1]
;
}
chainData[chainLength-1].y
+=
0.25f
*
wdt
*
chainForces[chainLength-1]
;
END_YS_LOOP
}
//
MTS
loop
if
(
chainType
==
0
)
{
scaleFactor[0].x
=
scale
;
}
else
{
scaleFactor[0].y
=
scale
;
}
}
/**
*
Compute
total
(
potential
+
kinetic
)
energy
of
the
Nose-Hoover
beads
*/
__kernel
void
computeHeatBathEnergy
(
__global
mixed*
restrict
heatBathEnergy,
int
chainLength,
int
numDOFs,
mixed
kT,
float
frequency,
__global
const
mixed2*
restrict
chainData
)
{
//
Note
that
this
is
always
incremented
; make sure it's zeroed properly before the first call
for
(
int
i
=
0
; i < chainLength; ++i) {
mixed
prefac
=
i
?
1
:
numDOFs
;
mixed
mass
=
prefac
*
kT
/
(
frequency
*
frequency
)
;
mixed
velocity
=
chainData[i].y
;
//
The
kinetic
energy
of
this
bead
heatBathEnergy[0]
+=
0.5f
*
mass
*
velocity
*
velocity
;
//
The
potential
energy
of
this
bead
mixed
position
=
chainData[i].x
;
heatBathEnergy[0]
+=
prefac
*
kT
*
position
;
}
}
__kernel
void
computeAtomsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numAtoms,
__global
const
mixed4*
restrict
velm,
__global
const
int
*restrict
atoms
)
{
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
//energy
=
1
; return;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
int
atom
=
atoms[index]
;
mixed4
v
=
velm[atom]
;
mixed
mass
=
v.w
==
0
?
0
:
1
/
v.w
;
energy.x
+=
0.5f
*
mass
*
(
v.x*v.x
+
v.y*v.y
+
v.z*v.z
)
;
index
+=
get_global_size
(
0
)
;
}
energyBuffer[get_global_id
(
0
)
]
=
energy
;
}
__kernel
void
computePairsKineticEnergy
(
__global
mixed2
*
restrict
energyBuffer,
int
numPairs,
__global
const
mixed4*
restrict
velm,
__global
const
int2
*restrict
pairs
)
{
mixed2
energy
=
(
mixed2
)
(
0
,
0
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int2
pair
=
pairs[index]
;
int
atom1
=
pair.x
;
int
atom2
=
pair.y
;
mixed4
v1
=
velm[atom1]
;
mixed4
v2
=
velm[atom2]
;
mixed
m1
=
v1.w
==
0
?
0
:
1
/
v1.w
;
mixed
m2
=
v2.w
==
0
?
0
:
1
/
v2.w
;
mixed4
cv
;
cv.x
=
(
m1*v1.x
+
m2*v2.x
)
/
(
m1
+
m2
)
;
cv.y
=
(
m1*v1.y
+
m2*v2.y
)
/
(
m1
+
m2
)
;
cv.z
=
(
m1*v1.z
+
m2*v2.z
)
/
(
m1
+
m2
)
;
mixed4
rv
;
rv.x
=
v2.x
-
v1.x
;
rv.y
=
v2.y
-
v1.y
;
rv.z
=
v2.z
-
v1.z
;
energy.x
+=
0.5f
*
(
m1
+
m2
)
*
(
cv.x*cv.x
+
cv.y*cv.y
+
cv.z*cv.z
)
;
energy.y
+=
0.5f
*
(
m1
*
m2
/
(
m1
+
m2
))
*
(
rv.x*rv.x
+
rv.y*rv.y
+
rv.z*rv.z
)
;
index
+=
get_global_size
(
0
)
;
}
//
The
atoms
version
of
this
has
been
called
already,
so
accumulate
instead
of
assigning
here
energyBuffer[get_global_id
(
0
)
].xy
+=
energy.xy
;
}
__kernel
void
scaleAtomsVelocities
(
__global
mixed2*
restrict
scaleFactor,
int
numAtoms,
__global
mixed4*
restrict
velm,
__global
const
int
*restrict
atoms
)
{
const
mixed
scale
=
scaleFactor[0].x
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
int
atom
=
atoms[index]
;
velm[atom].x
*=
scale
;
velm[atom].y
*=
scale
;
velm[atom].z
*=
scale
;
index
+=
get_global_size
(
0
)
;
}
}
__kernel
void
scalePairsVelocities
(
__global
mixed2
*
restrict
scaleFactor,
int
numPairs,
__global
mixed4*
restrict
velm,
__global
const
int2
*restrict
pairs
)
{
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numPairs
)
{
int
atom1
=
pairs[index].x
;
int
atom2
=
pairs[index].y
;
mixed
m1
=
velm[atom1].w
==
0
?
0
:
1
/
velm[atom1].w
;
mixed
m2
=
velm[atom2].w
==
0
?
0
:
1
/
velm[atom2].w
;
mixed4
cv
;
cv.xyz
=
(
m1*velm[atom1].xyz
+
m2*velm[atom2].xyz
)
/
(
m1
+
m2
)
;
mixed4
rv
;
rv.xyz
=
velm[atom2].xyz
-
velm[atom1].xyz
;
velm[atom1].x
=
scaleFactor[0].x
*
cv.x
-
scaleFactor[0].y
*
rv.x
*
m2
/
(
m1
+
m2
)
;
velm[atom1].y
=
scaleFactor[0].x
*
cv.y
-
scaleFactor[0].y
*
rv.y
*
m2
/
(
m1
+
m2
)
;
velm[atom1].z
=
scaleFactor[0].x
*
cv.z
-
scaleFactor[0].y
*
rv.z
*
m2
/
(
m1
+
m2
)
;
velm[atom2].x
=
scaleFactor[0].x
*
cv.x
+
scaleFactor[0].y
*
rv.x
*
m1
/
(
m1
+
m2
)
;
velm[atom2].y
=
scaleFactor[0].x
*
cv.y
+
scaleFactor[0].y
*
rv.y
*
m1
/
(
m1
+
m2
)
;
velm[atom2].z
=
scaleFactor[0].x
*
cv.z
+
scaleFactor[0].y
*
rv.z
*
m1
/
(
m1
+
m2
)
;
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Sum
the
energy
buffer
containing
a
pair
of
energies
stored
as
mixed2.
This
is
copied
from
utilities.cu
with
small
modifications
*/
__kernel
void
reduceEnergyPair
(
__global
const
mixed2*
restrict
energyBuffer,
__global
mixed2*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed2*
restrict
tempBuffer
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
mixed2
sum
=
(
mixed2
)
(
0
,
0
)
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0)) {
sum.xy
+=
energyBuffer[index].xy
;
}
tempBuffer[thread].xy
=
sum.xy
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
workGroupSize
)
{
tempBuffer[thread].xy
+=
tempBuffer[thread+i].xy
;
}
}
if
(
thread
==
0
)
{
*result
=
tempBuffer[0]
;
}
}
platforms/opencl/src/kernels/velocityVerlet.cl
0 → 100644
View file @
98f73120
/**
*
Perform
the
first
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart1
(
int
numAtoms,
int
paddedNumAtoms,
__global
const
mixed2*
restrict
dt,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
mixed4*
restrict
posDelta
)
{
const
mixed2
stepSize
=
dt[0]
;
const
mixed
dtPos
=
stepSize.y
;
const
mixed
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
velocity.x
+=
0.5f
*
dtVel*force[index].x*velocity.w
;
velocity.y
+=
0.5f
*
dtVel*force[index].y*velocity.w
;
velocity.z
+=
0.5f
*
dtVel*force[index].z*velocity.w
;
pos.x
=
velocity.x*dtPos
;
pos.y
=
velocity.y*dtPos
;
pos.z
=
velocity.z*dtPos
;
posDelta[index]
=
pos
;
velm[index]
=
velocity
;
}
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Perform
the
second
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart2
(
int
numAtoms,
__global
mixed2*
restrict
dt,
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
mixed4*
restrict
posDelta
)
{
mixed2
stepSize
=
dt[0]
;
int
index
=
get_global_id
(
0
)
;
if
(
index
==
0
)
dt[0].x
=
stepSize.y
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
#
ifdef
USE_MIXED_PRECISION
real4
pos1
=
posq[index]
;
real4
pos2
=
posqCorrection[index]
;
mixed4
pos
=
(
mixed4
)
(
pos1.x+
(
mixed
)
pos2.x,
pos1.y+
(
mixed
)
pos2.y,
pos1.z+
(
mixed
)
pos2.z,
pos1.w
)
;
#
else
real4
pos
=
posq[index]
;
#
endif
mixed4
delta
=
posDelta[index]
;
pos.xyz
+=
delta.xyz
;
#
ifdef
USE_MIXED_PRECISION
posq[index]
=
(
real4
)
((
real
)
pos.x,
(
real
)
pos.y,
(
real
)
pos.z,
(
real
)
pos.w
)
;
posqCorrection[index]
=
(
real4
)
(
pos.x-
(
real
)
pos.x,
pos.y-
(
real
)
pos.y,
pos.z-
(
real
)
pos.z,
0
)
;
#
else
posq[index]
=
pos
;
#
endif
}
index
+=
get_global_size
(
0
)
;
}
}
/**
*
Perform
the
third
step
of
Velocity
Verlet
integration.
*/
__kernel
void
integrateVelocityVerletPart3
(
int
numAtoms,
int
paddedNumAtoms,
__global
mixed2*
restrict
dt,
__global
real4*
restrict
posq,
__global
real4*
restrict
posqCorrection,
__global
mixed4*
restrict
velm,
__global
const
real4*
restrict
force,
__global
const
mixed4*
restrict
posDelta
)
{
mixed2
stepSize
=
dt[0]
;
#
ifndef
SUPPORTS_DOUBLE_PRECISION
double
oneOverDt
=
1.0/stepSize.y
;
#
else
float
oneOverDt
=
1.0f/stepSize.y
;
float
correction
=
(
1.0f-oneOverDt*stepSize.y
)
/stepSize.y
;
#
endif
const
mixed
dtVel
=
0.5f*
(
stepSize.x+stepSize.y
)
;
int
index
=
get_global_id
(
0
)
;
if
(
index
==
0
)
dt[0].x
=
stepSize.y
;
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm[index]
;
if
(
velocity.w
!=
0.0
)
{
mixed4
deltaXconstrained
=
posDelta[index]
;
velocity.x
+=
0.5f
*
dtVel*force[index].x*velocity.w
+
(
deltaXconstrained.x
-
velocity.x*stepSize.y
)
*oneOverDt
;
velocity.y
+=
0.5f
*
dtVel*force[index].y*velocity.w
+
(
deltaXconstrained.y
-
velocity.y*stepSize.y
)
*oneOverDt
;
velocity.z
+=
0.5f
*
dtVel*force[index].z*velocity.w
+
(
deltaXconstrained.z
-
velocity.z*stepSize.y
)
*oneOverDt
;
#
ifdef
SUPPORTS_DOUBLE_PRECISION
velocity.x
+=
(
deltaXconstrained.x
-
velocity.x*stepSize.y
)
*correction
;
velocity.y
+=
(
deltaXconstrained.y
-
velocity.y*stepSize.y
)
*correction
;
velocity.z
+=
(
deltaXconstrained.z
-
velocity.z*stepSize.y
)
*correction
;
#
endif
velm[index]
=
velocity
;
}
index
+=
get_global_size
(
0
)
;
}
}
platforms/opencl/tests/TestOpenCLNoseHooverIntegrator.cpp
0 → 100644
View file @
98f73120
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestNoseHooverIntegrator.h"
void
runPlatformTests
()
{
}
platforms/opencl/tests/TestOpenCLNoseHooverThermostat.cpp
0 → 100644
View file @
98f73120
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestNoseHooverThermostat.h"
void
runPlatformTests
()
{
}
plugins/drude/platforms/opencl/tests/CMakeLists.txt
View file @
98f73120
...
...
@@ -5,6 +5,7 @@
ENABLE_TESTING
()
INCLUDE_DIRECTORIES
(
${
OPENCL_INCLUDE_DIR
}
)
INCLUDE_DIRECTORIES
(
${
OPENMM_DIR
}
/plugins/drude/tests
)
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
...
...
plugins/drude/platforms/opencl/tests/TestOpenCLDrudeNoseHoover.cpp
0 → 100644
View file @
98f73120
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
//#include "ReferenceTests.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/DrudeForce.h"
#include "OpenCLPlatform.h"
#include "SimTKOpenMMUtilities.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
extern
"C"
OPENMM_EXPORT
void
registerDrudeOpenCLKernelFactories
();
Platform
&
initializePlatform
(
int
argc
,
char
*
argv
[])
{
registerDrudeOpenCLKernelFactories
();
if
(
argc
>
1
)
Platform
::
getPlatformByName
(
"OpenCL"
).
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
return
Platform
::
getPlatformByName
(
"OpenCL"
);
}
#include "TestDrudeNoseHoover.h"
void
runPlatformTests
()
{
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment