Unverified Commit 98f73120 authored by Andy Simmonett's avatar Andy Simmonett
Browse files

Implement NHC for OpenCL platform

parent 80e1b66f
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "windowsExportOpenCL.h" #include "windowsExportOpenCL.h"
#include <iosfwd> #include <iosfwd>
#include <map>
namespace OpenMM { namespace OpenMM {
...@@ -119,6 +120,12 @@ public: ...@@ -119,6 +120,12 @@ public:
* @param timeShift the amount by which to shift the velocities in time * @param timeShift the amount by which to shift the velocities in time
*/ */
double computeKineticEnergy(double timeShift); double computeKineticEnergy(double timeShift);
/**
* Get the data structure that holds the state of all Nose-Hoover chains
*
* @return vector of chain states
*/
std::map<int, OpenCLArray>& getNoseHooverChainState();
private: private:
void applyConstraints(bool constrainVelocities, double tol); void applyConstraints(bool constrainVelocities, double tol);
OpenCLContext& context; OpenCLContext& context;
...@@ -166,6 +173,7 @@ private: ...@@ -166,6 +173,7 @@ private:
mm_double2 lastStepSize; mm_double2 lastStepSize;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer; struct ConstraintOrderer;
std::map<int, OpenCLArray> noseHooverChainState;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/internal/CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h" #include "openmm/internal/CustomIntegratorUtilities.h"
#include "openmm/internal/NoseHooverChain.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h" #include "lepton/ExpressionProgram.h"
#include "openmm/System.h" #include "openmm/System.h"
...@@ -1346,6 +1347,43 @@ private: ...@@ -1346,6 +1347,43 @@ private:
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
/*
* This kernel is invoked by NoseHooverIntegrator to take one time step.
*/
class OpenCLIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
public:
OpenCLIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateVelocityVerletStepKernel(name, platform), cl(cl) { }
~OpenCLIntegrateVelocityVerletStepKernel() {}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the NoseHooverIntegrator this kernel will be used for
*/
void initialize(const System& system, const NoseHooverIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
* @param forcesAreValid a reference to the parent integrator's boolean for keeping
* track of the validity of the current forces.
*/
void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the NoseHooverIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
private:
OpenCLContext& cl;
cl::Kernel kernel1, kernel2, kernel3;
};
/** /**
* This kernel is invoked by LangevinIntegrator to take one time step. * This kernel is invoked by LangevinIntegrator to take one time step.
*/ */
...@@ -1667,6 +1705,72 @@ private: ...@@ -1667,6 +1705,72 @@ private:
cl::Kernel kernel; cl::Kernel kernel;
}; };
/**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities.
*/
class OpenCLNoseHooverChainKernel : public NoseHooverChainKernel {
public:
OpenCLNoseHooverChainKernel(std::string name, const Platform& platform, OpenCLContext& cl) : NoseHooverChainKernel(name, platform), cl(cl) {
}
~OpenCLNoseHooverChainKernel() {}
/**
* Initialize the kernel.
*/
virtual void initialize();
/**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the object describing the chain to be propagated.
* @param kineticEnergies the {absolute, relative} kineticEnergy of the particles being thermostated by this chain.
* @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/
virtual std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy.
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy.
*/
virtual double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/**
* Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param downloadValue whether the computed value should be downloaded and returned.
*
*/
virtual std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain
*
* @param context the context in which to execute this kernel
* @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private:
int sumWorkGroupSize;
OpenCLContext& cl;
OpenCLArray scaleFactorBuffer, kineticEnergyBuffer, chainMasses, chainForces, heatBathEnergy;
std::map<int, OpenCLArray> atomlists, pairlists;
std::map<int, cl::Kernel> propagateKernels;
cl::Kernel reduceEnergyKernel;
cl::Kernel computeHeatBathEnergyKernel;
cl::Kernel computeAtomsKineticEnergyKernel;
cl::Kernel computePairsKineticEnergyKernel;
cl::Kernel scaleAtomsVelocitiesKernel;
cl::Kernel scalePairsVelocitiesKernel;
};
/** /**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/ */
......
...@@ -848,6 +848,24 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) { ...@@ -848,6 +848,24 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
} }
void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) { void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) {
size_t numChains = noseHooverChainState.size();
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.write((char*) &numChains, sizeof(size_t));
for (auto &chainState: noseHooverChainState){
int chainID = chainState.first;
size_t chainLength = chainState.second.getSize();
stream.write((char*) &chainID, sizeof(int));
stream.write((char*) &chainLength, sizeof(size_t));
if (useDouble) {
vector<mm_double2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_double2)*chainLength);
} else {
vector<mm_float2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_float2)*chainLength);
}
}
if (!random.isInitialized()) if (!random.isInitialized())
return; return;
stream.write((char*) &randomPos, sizeof(int)); stream.write((char*) &randomPos, sizeof(int));
...@@ -860,6 +878,28 @@ void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) { ...@@ -860,6 +878,28 @@ void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) {
} }
void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) { void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) {
size_t numChains, chainLength;
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.read((char*) &numChains, sizeof(size_t));
noseHooverChainState.clear();
for (size_t i=0; i<numChains; i++){
int chainID;
stream.read((char*) &chainID, sizeof(int));
stream.read((char*) &chainLength, sizeof(size_t));
if (useDouble) {
noseHooverChainState[chainID] = OpenCLArray();
noseHooverChainState[chainID].initialize<mm_double2>(context, chainLength, "chainState" + std::to_string(chainID));
std::vector<mm_double2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_double2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
} else {
noseHooverChainState[chainID] = OpenCLArray();
noseHooverChainState[chainID].initialize<mm_float2>(context, chainLength, "chainState" + std::to_string(chainID));
std::vector<mm_float2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_float2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
}
}
if (!random.isInitialized()) if (!random.isInitialized())
return; return;
stream.read((char*) &randomPos, sizeof(int)); stream.read((char*) &randomPos, sizeof(int));
...@@ -918,3 +958,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) { ...@@ -918,3 +958,7 @@ double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
posDelta.copyTo(context.getVelm()); posDelta.copyTo(context.getVelm());
return 0.5*energy; return 0.5*energy;
} }
std::map<int, OpenCLArray>& OpenCLIntegrationUtilities::getNoseHooverChainState(){
return noseHooverChainState;
};
...@@ -128,6 +128,10 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -128,6 +128,10 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLIntegrateCustomStepKernel(name, platform, cl); return new OpenCLIntegrateCustomStepKernel(name, platform, cl);
if (name == ApplyAndersenThermostatKernel::Name()) if (name == ApplyAndersenThermostatKernel::Name())
return new OpenCLApplyAndersenThermostatKernel(name, platform, cl); return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
if (name == NoseHooverChainKernel::Name())
return new OpenCLNoseHooverChainKernel(name, platform, cl);
if (name == IntegrateVelocityVerletStepKernel::Name())
return new OpenCLIntegrateVelocityVerletStepKernel(name, platform, cl);
if (name == ApplyMonteCarloBarostatKernel::Name()) if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl); return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name()) if (name == RemoveCMMotionKernel::Name())
......
This diff is collapsed.
...@@ -87,12 +87,14 @@ OpenCLPlatform::OpenCLPlatform() { ...@@ -87,12 +87,14 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVelocityVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory); registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory); registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory); registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateCustomStepKernel::Name(), factory); registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory); registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(NoseHooverChainKernel::Name(), factory);
registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory); registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory); registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
platformProperties.push_back(OpenCLDeviceIndex()); platformProperties.push_back(OpenCLDeviceIndex());
......
2c2
< * Perform the first step of Verlet integration.
---
> * Perform the first step of verlet integration.
5,11c5,10
< extern "C" __global__ void integrateVerletPart1(int numAtoms, int paddedNumAtoms, const mixed2* __restrict__ dt, const real4* __restrict__ posq,
< const real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta) {
< const mixed2 stepSize = dt[0];
< const mixed dtPos = stepSize.y;
< const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
< const mixed scale = dtVel/(mixed) 0x100000000;
< for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
---
> __kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
> mixed2 stepSize = dt[0];
> mixed dtPos = stepSize.y;
> mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
> int index = get_global_id(0);
> while (index < numAtoms) {
17c16
< mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
> mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
21,26c20,23
< velocity.x += scale*force[index]*velocity.w;
< velocity.y += scale*force[index+paddedNumAtoms]*velocity.w;
< velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w;
< pos.x = velocity.x*dtPos;
< pos.y = velocity.y*dtPos;
< pos.z = velocity.z*dtPos;
---
> velocity.x += force[index].x*dtVel*velocity.w;
> velocity.y += force[index].y*dtVel*velocity.w;
> velocity.z += force[index].z*dtVel*velocity.w;
> pos.xyz = velocity.xyz*dtPos;
29a27
> index += get_global_size(0);
34c32
< * Perform the second step of Verlet integration.
---
> * Perform the second step of verlet integration.
37,38c35
< extern "C" __global__ void integrateVerletPart2(int numAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq,
< real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) {
---
> __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
40c37
< #if __CUDA_ARCH__ >= 130
---
> #ifdef SUPPORTS_DOUBLE_PRECISION
46,47c43
< int index = blockIdx.x*blockDim.x+threadIdx.x;
< if (index == 0)
---
> if (get_global_id(0) == 0)
49c45,47
< for (; index < numAtoms; index += blockDim.x*gridDim.x) {
---
> barrier(CLK_LOCAL_MEM_FENCE);
> int index = get_global_id(0);
> while (index < numAtoms) {
55c53
< mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
> mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
60,64c58,60
< pos.x += delta.x;
< pos.y += delta.y;
< pos.z += delta.z;
< #if __CUDA_ARCH__ >= 130
< velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w);
---
> pos.xyz += delta.xyz;
> #ifdef SUPPORTS_DOUBLE_PRECISION
> velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
66c62
< velocity = make_mixed4((mixed) (delta.x*oneOverDt+delta.x*correction), (mixed) (delta.y*oneOverDt+delta.y*correction), (mixed) (delta.z*oneOverDt+delta.z*correction), velocity.w);
---
> velocity.xyz = delta.xyz*oneOverDt + delta.xyz*correction;
69,70c65,66
< posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
< posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
---
> posq[index] = convert_real4(pos);
> posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
75a72
> index += get_global_size(0);
83c80
< extern "C" __global__ void selectVerletStepSize(int numAtoms, int paddedNumAtoms, mixed maxStepSize, mixed errorTol, mixed2* __restrict__ dt, const mixed4* __restrict__ velm, const long long* __restrict__ force) {
---
> __kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
86,90c83,86
< extern __shared__ mixed error[];
< mixed err = 0.0f;
< const mixed scale = RECIP((mixed) 0x100000000);
< for (int index = threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
< mixed3 f = make_mixed3(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2]);
---
> mixed err = 0;
> int index = get_local_id(0);
> while (index < numAtoms) {
> real4 f = force[index];
92a89
> index += get_global_size(0);
94,95c91,92
< error[threadIdx.x] = err;
< __syncthreads();
---
> error[get_local_id(0)] = err;
> barrier(CLK_LOCAL_MEM_FENCE);
99,102c96,99
< for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
< if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
< error[threadIdx.x] += error[threadIdx.x+offset];
< __syncthreads();
---
> for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
> if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
> error[get_local_id(0)] += error[get_local_id(0)+offset];
> barrier(CLK_LOCAL_MEM_FENCE);
104,106c101,103
< if (threadIdx.x == 0) {
< mixed totalError = SQRT(error[0]/(numAtoms*3));
< mixed newStepSize = SQRT(errorTol/totalError);
---
> if (get_local_id(0) == 0) {
> mixed totalError = sqrt(error[0]/(numAtoms*3));
> mixed newStepSize = sqrt(errorTol/totalError);
//#include <initializer_list>
__kernel void propagateNoseHooverChain(__global mixed2* restrict chainData, __global const mixed2 * restrict energySum, __global mixed2* restrict scaleFactor,
__global mixed* restrict chainMasses, __global mixed* restrict chainForces,
int chainType, int chainLength, int numMTS, int numDOFs, float timeStep,
mixed kT, float frequency){
const mixed kineticEnergy = chainType == 0 ? energySum[0].x : energySum[0].y;
mixed scale = 1;
if(kineticEnergy < 1e-8) return;
for (int bead = 0; bead < chainLength; ++bead) chainMasses[bead] = kT / (frequency * frequency);
chainMasses[0] *= numDOFs;
mixed KE2 = 2.0f * kineticEnergy;
mixed timeOverMTS = timeStep / numMTS;
chainForces[0] = (KE2 - numDOFs * kT) / chainMasses[0];
for (int bead = 0; bead < chainLength - 1; ++bead) {
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
for (int mts = 0; mts < numMTS; ++mts) {
BEGIN_YS_LOOP
mixed wdt = ys * timeOverMTS;
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
for (int bead = chainLength - 2; bead >= 0; --bead) {
mixed aa = EXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (chainData[bead].y * aa + 0.25f * wdt * chainForces[bead]);
}
// update particle velocities
mixed aa = EXP(-0.5f * wdt * chainData[0].y);
scale *= aa;
// update the thermostat positions
for (int bead = 0; bead < chainLength; ++bead) {
chainData[bead].x += 0.5f * chainData[bead].y * wdt;
}
// update the forces
chainForces[0] = (scale * scale * KE2 - numDOFs * kT) / chainMasses[0];
// update thermostat velocities
for (int bead = 0; bead < chainLength - 1; ++bead) {
mixed aa = EXP(-0.125f * wdt * chainData[bead + 1].y);
chainData[bead].y = aa * (aa * chainData[bead].y + 0.25f * wdt * chainForces[bead]);
chainForces[bead + 1] = (chainMasses[bead] * chainData[bead].y * chainData[bead].y - kT) / chainMasses[bead + 1];
}
chainData[chainLength-1].y += 0.25f * wdt * chainForces[chainLength-1];
END_YS_LOOP
} // MTS loop
if (chainType == 0) {
scaleFactor[0].x = scale;
} else {
scaleFactor[0].y = scale;
}
}
/**
* Compute total (potential + kinetic) energy of the Nose-Hoover beads
*/
__kernel void computeHeatBathEnergy(__global mixed* restrict heatBathEnergy, int chainLength, int numDOFs,
mixed kT, float frequency, __global const mixed2* restrict chainData){
// Note that this is always incremented; make sure it's zeroed properly before the first call
for(int i = 0; i < chainLength; ++i) {
mixed prefac = i ? 1 : numDOFs;
mixed mass = prefac * kT / (frequency * frequency);
mixed velocity = chainData[i].y;
// The kinetic energy of this bead
heatBathEnergy[0] += 0.5f * mass * velocity * velocity;
// The potential energy of this bead
mixed position = chainData[i].x;
heatBathEnergy[0] += prefac * kT * position;
}
}
__kernel void computeAtomsKineticEnergy(__global mixed2 * restrict energyBuffer, int numAtoms,
__global const mixed4* restrict velm, __global const int *restrict atoms){
mixed2 energy = (mixed2) (0,0);
//energy = 1; return;
int index = get_global_id(0);
while (index < numAtoms){
int atom = atoms[index];
mixed4 v = velm[atom];
mixed mass = v.w == 0 ? 0 : 1 / v.w;
energy.x += 0.5f * mass * (v.x*v.x + v.y*v.y + v.z*v.z);
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] = energy;
}
__kernel void computePairsKineticEnergy(__global mixed2 * restrict energyBuffer, int numPairs,
__global const mixed4* restrict velm, __global const int2 *restrict pairs){
mixed2 energy = (mixed2) (0,0);
int index = get_global_id(0);
while (index < numPairs){
int2 pair = pairs[index];
int atom1 = pair.x;
int atom2 = pair.y;
mixed4 v1 = velm[atom1];
mixed4 v2 = velm[atom2];
mixed m1 = v1.w == 0 ? 0 : 1 / v1.w;
mixed m2 = v2.w == 0 ? 0 : 1 / v2.w;
mixed4 cv;
cv.x = (m1*v1.x + m2*v2.x) / (m1 + m2);
cv.y = (m1*v1.y + m2*v2.y) / (m1 + m2);
cv.z = (m1*v1.z + m2*v2.z) / (m1 + m2);
mixed4 rv;
rv.x = v2.x - v1.x;
rv.y = v2.y - v1.y;
rv.z = v2.z - v1.z;
energy.x += 0.5f * (m1 + m2) * (cv.x*cv.x + cv.y*cv.y + cv.z*cv.z);
energy.y += 0.5f * (m1 * m2 / (m1 + m2)) * (rv.x*rv.x + rv.y*rv.y + rv.z*rv.z);
index += get_global_size(0);
}
// The atoms version of this has been called already, so accumulate instead of assigning here
energyBuffer[get_global_id(0)].xy += energy.xy;
}
__kernel void scaleAtomsVelocities(__global mixed2* restrict scaleFactor, int numAtoms,
__global mixed4* restrict velm, __global const int *restrict atoms){
const mixed scale = scaleFactor[0].x;
int index = get_global_id(0);
while (index < numAtoms){
int atom = atoms[index];
velm[atom].x *= scale;
velm[atom].y *= scale;
velm[atom].z *= scale;
index += get_global_size(0);
}
}
__kernel void scalePairsVelocities(__global mixed2 * restrict scaleFactor, int numPairs,
__global mixed4* restrict velm, __global const int2 *restrict pairs){
int index = get_global_id(0);
while (index < numPairs){
int atom1 = pairs[index].x;
int atom2 = pairs[index].y;
mixed m1 = velm[atom1].w == 0 ? 0 : 1 / velm[atom1].w;
mixed m2 = velm[atom2].w == 0 ? 0 : 1 / velm[atom2].w;
mixed4 cv;
cv.xyz = (m1*velm[atom1].xyz + m2*velm[atom2].xyz) / (m1 + m2);
mixed4 rv;
rv.xyz = velm[atom2].xyz - velm[atom1].xyz;
velm[atom1].x = scaleFactor[0].x * cv.x - scaleFactor[0].y * rv.x * m2 / (m1 + m2);
velm[atom1].y = scaleFactor[0].x * cv.y - scaleFactor[0].y * rv.y * m2 / (m1 + m2);
velm[atom1].z = scaleFactor[0].x * cv.z - scaleFactor[0].y * rv.z * m2 / (m1 + m2);
velm[atom2].x = scaleFactor[0].x * cv.x + scaleFactor[0].y * rv.x * m1 / (m1 + m2);
velm[atom2].y = scaleFactor[0].x * cv.y + scaleFactor[0].y * rv.y * m1 / (m1 + m2);
velm[atom2].z = scaleFactor[0].x * cv.z + scaleFactor[0].y * rv.z * m1 / (m1 + m2);
index += get_global_size(0);
}
}
/**
* Sum the energy buffer containing a pair of energies stored as mixed2. This is copied from utilities.cu with small modifications
*/
__kernel void reduceEnergyPair(__global const mixed2* restrict energyBuffer, __global mixed2* restrict result, int bufferSize, int workGroupSize, __local mixed2* restrict tempBuffer) {
const unsigned int thread = get_local_id(0);
mixed2 sum = (mixed2) (0,0);
for (unsigned int index = thread; index < bufferSize; index += get_local_size(0)) {
sum.xy += energyBuffer[index].xy;
}
tempBuffer[thread].xy = sum.xy;
for (int i = 1; i < workGroupSize; i *= 2) {
barrier(CLK_LOCAL_MEM_FENCE);
if (thread%(i*2) == 0 && thread+i < workGroupSize) {
tempBuffer[thread].xy += tempBuffer[thread+i].xy;
}
}
if (thread == 0) {
*result = tempBuffer[0];
}
}
/**
* Perform the first step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart1(int numAtoms, int paddedNumAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq,
__global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
const mixed2 stepSize = dt[0];
const mixed dtPos = stepSize.y;
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
int index = get_global_id(0);
while (index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
velocity.x += 0.5f * dtVel*force[index].x*velocity.w;
velocity.y += 0.5f * dtVel*force[index].y*velocity.w;
velocity.z += 0.5f * dtVel*force[index].z*velocity.w;
pos.x = velocity.x*dtPos;
pos.y = velocity.y*dtPos;
pos.z = velocity.z*dtPos;
posDelta[index] = pos;
velm[index] = velocity;
}
index += get_global_size(0);
}
}
/**
* Perform the second step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
__global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
mixed2 stepSize = dt[0];
int index = get_global_id(0);
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
index += get_global_size(0);
}
}
/**
* Perform the third step of Velocity Verlet integration.
*/
__kernel void integrateVelocityVerletPart3(int numAtoms, int paddedNumAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
__global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed4* restrict posDelta) {
mixed2 stepSize = dt[0];
#ifndef SUPPORTS_DOUBLE_PRECISION
double oneOverDt = 1.0/stepSize.y;
#else
float oneOverDt = 1.0f/stepSize.y;
float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
int index = get_global_id(0);
if (index == 0)
dt[0].x = stepSize.y;
while(index < numAtoms) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 deltaXconstrained = posDelta[index];
velocity.x += 0.5f * dtVel*force[index].x*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
velocity.y += 0.5f * dtVel*force[index].y*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
velocity.z += 0.5f * dtVel*force[index].z*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
#ifdef SUPPORTS_DOUBLE_PRECISION
velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
#endif
velm[index] = velocity;
}
index += get_global_size(0);
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestNoseHooverIntegrator.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Andreas Krämer and Andrew C. Simmonett *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestNoseHooverThermostat.h"
void runPlatformTests() {
}
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
ENABLE_TESTING() ENABLE_TESTING()
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)
# Automatically create tests using files named "Test*.cpp" # Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp") FILE(GLOB TEST_PROGS "*Test*.cpp")
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
//#include "ReferenceTests.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/DrudeForce.h"
#include "OpenCLPlatform.h"
#include "SimTKOpenMMUtilities.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories();
Platform& initializePlatform(int argc, char* argv[]) {
registerDrudeOpenCLKernelFactories();
if (argc > 1) Platform::getPlatformByName("OpenCL").setPropertyDefaultValue("Precision", std::string(argv[1]));
return Platform::getPlatformByName("OpenCL");
}
#include "TestDrudeNoseHoover.h"
void runPlatformTests() { }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment