Commit f2391672 authored by Peter Eastman's avatar Peter Eastman
Browse files

Created OpenCL implementation of CMMotionRemover. Also made a few optimizations.

parent 9b6ddf42
......@@ -66,7 +66,7 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
// return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
if (name == CalcKineticEnergyKernel::Name())
return new OpenCLCalcKineticEnergyKernel(name, platform, cl);
// if (name == RemoveCMMotionKernel::Name())
// return new OpenCLRemoveCMMotionKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name())
return new OpenCLRemoveCMMotionKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
......@@ -1184,15 +1184,23 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
int numAtoms = cl.getNumAtoms();
double dt = integrator.getStepSize();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl_float>(1, dt);
kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl_float>(1, dt);
kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
}
// Call the first integration kernel.
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl_float>(1, dt);
kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel1, numAtoms);
// Apply constraints.
......@@ -1201,11 +1209,6 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
// Call the second integration kernel.
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl_float>(1, dt);
kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel2, numAtoms);
// Update the time and step count.
......@@ -1241,6 +1244,29 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
int numAtoms = cl.getNumAtoms();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, params->getDeviceBuffer());
kernel1.setArg(5, params->getSize()*sizeof(cl_float), NULL);
kernel1.setArg<cl::Buffer>(6, xVector->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7, vVector->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(8,integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, params->getDeviceBuffer());
kernel2.setArg(4, params->getSize()*sizeof(cl_float), NULL);
kernel2.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
kernel3.setArg<cl_int>(0, numAtoms);
kernel3.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
}
int numThreads = cl.getNumThreadBlocks()*cl.ThreadBlockSize;
double temperature = integrator.getTemperature();
double friction = integrator.getFriction();
......@@ -1312,15 +1338,6 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
// Call the first integration kernel.
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, params->getDeviceBuffer());
kernel1.setArg(5, params->getSize()*sizeof(cl_float), NULL);
kernel1.setArg<cl::Buffer>(6, xVector->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7, vVector->getDeviceBuffer());
kernel1.setArg<cl::Buffer>(8,integration.getRandom().getDeviceBuffer());
kernel1.setArg<cl_uint>(9, integration.prepareRandomNumbers(2*numThreads));
cl.executeKernel(kernel1, numAtoms);
......@@ -1330,14 +1347,6 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
// Call the second integration kernel.
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, params->getDeviceBuffer());
kernel2.setArg(4, params->getSize()*sizeof(cl_float), NULL);
kernel2.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*numThreads));
cl.executeKernel(kernel2, numAtoms);
......@@ -1347,9 +1356,6 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
// Call the third integration kernel.
kernel3.setArg<cl_int>(0, numAtoms);
kernel3.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
cl.executeKernel(kernel3, numAtoms);
// Update the time and step count.
......@@ -1527,11 +1533,35 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) {
}
return 0.5*energy;
}
//
//void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
// data.removeCM = true;
// data.cmMotionFrequency = force.getFrequency();
//}
//
//void OpenCLRemoveCMMotionKernel::execute(ContextImpl& context) {
//}
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
if (cmMomentum != NULL)
delete cmMomentum;
}
void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
frequency = force.getFrequency();
int numAtoms = cl.getNumAtoms();
cmMomentum = new OpenCLArray<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum");
double totalMass = 0.0;
for (int i = 0; i < numAtoms; i++)
totalMass += system.getParticleMass(i);
map<string, string> defines;
defines["INVERSE_TOTAL_MASS"] = doubleToString(1.0/totalMass);
cl::Program program = cl.createProgram(cl.loadSourceFromFile("removeCM.cl"), defines);
kernel1 = cl::Kernel(program, "calcCenterOfMassMomentum");
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cmMomentum->getDeviceBuffer());
kernel1.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
kernel2 = cl::Kernel(program, "removeCenterOfMassMomentum");
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cmMomentum->getDeviceBuffer());
kernel2.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
}
void OpenCLRemoveCMMotionKernel::execute(ContextImpl& context) {
cl.executeKernel(kernel1, cl.getNumAtoms());
cl.executeKernel(kernel2, cl.getNumAtoms());
}
......@@ -435,7 +435,7 @@ private:
*/
class OpenCLIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
public:
OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl) {
OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
}
~OpenCLIntegrateVerletStepKernel();
/**
......@@ -454,6 +454,7 @@ public:
void execute(ContextImpl& context, const VerletIntegrator& integrator);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
cl::Kernel kernel1, kernel2;
};
......@@ -463,7 +464,7 @@ private:
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
params(NULL), xVector(NULL), vVector(NULL) {
params(NULL), xVector(NULL), vVector(NULL), hasInitializedKernels(false) {
}
~OpenCLIntegrateLangevinStepKernel();
/**
......@@ -483,6 +484,7 @@ public:
private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray<cl_float>* params;
OpenCLArray<mm_float4>* xVector;
OpenCLArray<mm_float4>* vVector;
......@@ -622,29 +624,33 @@ private:
std::vector<double> masses;
};
///**
// * This kernel is invoked to remove center of mass motion from the system.
// */
//class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
//public:
// OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl) {
// }
// /**
// * Initialize the kernel, setting up the particle masses.
// *
// * @param system the System this kernel will be applied to
// * @param force the CMMotionRemover this kernel will be used for
// */
// void initialize(const System& system, const CMMotionRemover& force);
// /**
// * Execute the kernel.
// *
// * @param context the context in which to execute this kernel
// */
// void execute(ContextImpl& context);
//private:
// OpenCLContext& cl;
//};
/**
* This kernel is invoked to remove center of mass motion from the system.
*/
class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
public:
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl), cmMomentum(NULL) {
}
~OpenCLRemoveCMMotionKernel();
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param force the CMMotionRemover this kernel will be used for
*/
void initialize(const System& system, const CMMotionRemover& force);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
*/
void execute(ContextImpl& context);
private:
OpenCLContext& cl;
int frequency;
OpenCLArray<mm_float4>* cmMomentum;
cl::Kernel kernel1, kernel2;
};
} // namespace OpenMM
......
......@@ -61,7 +61,7 @@ OpenCLPlatform::OpenCLPlatform() {
// registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
// registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
// registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
platformProperties.push_back(OpenCLDeviceIndex());
setPropertyDefaultValue(OpenCLDeviceIndex(), "");
}
......
/**
* Calculate the center of mass momentum.
*/
__kernel void calcCenterOfMassMomentum(int numAtoms, __global float4* velm, __global float4* cmMomentum, __local float4* temp) {
int index = get_global_id(0);
float4 cm = 0.0f;
while (index < numAtoms) {
float4 velocity = velm[index];
cm.xyz += velocity.xyz/velocity.w;
index += get_global_size(0);
}
// Sum the threads in this group.
int thread = get_local_id(0);
temp[thread] = cm;
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef WARPS_ARE_ATOMIC
if (thread < 32) {
temp[thread] += temp[thread+32];
if (thread < 16)
temp[thread] += temp[thread+16];
if (thread < 8)
temp[thread] += temp[thread+8];
if (thread < 4)
temp[thread] += temp[thread+4];
if (thread < 2)
temp[thread] += temp[thread+2];
}
#else
if (thread < 32)
temp[thread] += temp[thread+32];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 16)
temp[thread] += temp[thread+16];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 8)
temp[thread] += temp[thread+8];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 4)
temp[thread] += temp[thread+4];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 2)
temp[thread] += temp[thread+2];
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (thread == 0)
cmMomentum[get_group_id(0)] = temp[thread]+temp[thread+1];
}
/**
* Remove center of mass motion.
*/
__kernel void removeCenterOfMassMomentum(int numAtoms, __global float4* velm, __global float4* cmMomentum, __local float4* temp) {
// First sum all of the momenta that were calculated by individual groups.
int index = get_local_id(0);
float4 cm = 0.0f;
while (index < get_num_groups(0)) {
cm += cmMomentum[index];
index += get_local_size(0);
}
int thread = get_local_id(0);
temp[thread] = cm;
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef WARPS_ARE_ATOMIC
if (thread < 32) {
temp[thread] += temp[thread+32];
if (thread < 16)
temp[thread] += temp[thread+16];
if (thread < 8)
temp[thread] += temp[thread+8];
if (thread < 4)
temp[thread] += temp[thread+4];
if (thread < 2)
temp[thread] += temp[thread+2];
}
#else
if (thread < 32)
temp[thread] += temp[thread+32];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 16)
temp[thread] += temp[thread+16];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 8)
temp[thread] += temp[thread+8];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 4)
temp[thread] += temp[thread+4];
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 2)
temp[thread] += temp[thread+2];
barrier(CLK_LOCAL_MEM_FENCE);
#endif
cm = (temp[0]+temp[1])*INVERSE_TOTAL_MASS;
// Now remove the center of mass velocity from each atom.
index = get_global_id(0);
while (index < numAtoms) {
velm[index].xyz -= cm.xyz;
index += get_global_size(0);
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the OpenCL implementation of AndersenThermostat.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/CMMotionRemover.h"
#include "openmm/Context.h"
#include "OpenCLPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
Vec3 calcCM(const vector<Vec3>& values, System& system) {
Vec3 cm;
for (int j = 0; j < system.getNumParticles(); ++j) {
cm[0] += values[j][0]*system.getParticleMass(j);
cm[1] += values[j][1]*system.getParticleMass(j);
cm[2] += values[j][2]*system.getParticleMass(j);
}
return cm;
}
void testMotionRemoval(Integrator& integrator) {
const int numParticles = 8;
OpenCLPlatform platform;
System system;
HarmonicBondForce* bonds = new HarmonicBondForce();
bonds->addBond(2, 3, 2.0, 0.5);
system.addForce(bonds);
NonbondedForce* nonbonded = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(i+1);
nonbonded->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(nonbonded);
CMMotionRemover* remover = new CMMotionRemover();
system.addForce(remover);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
init_gen_rand(0);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Now run it for a while and see if the center of mass remains fixed.
Vec3 cmPos = calcCM(context.getState(State::Positions).getPositions(), system);
for (int i = 0; i < 1000; ++i) {
integrator.step(1);
State state = context.getState(State::Positions | State::Velocities);
Vec3 pos = calcCM(state.getPositions(), system);
ASSERT_EQUAL_VEC(cmPos, pos, 1e-2);
Vec3 vel = calcCM(state.getVelocities(), system);
if (i > 0) {
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), vel, 1e-2);
}
}
}
int main() {
try {
LangevinIntegrator langevin(0.0, 1e-5, 0.01);
testMotionRemoval(langevin);
VerletIntegrator verlet(0.01);
testMotionRemoval(verlet);
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment