Commit 8656e3ba authored by peastman's avatar peastman
Browse files

Created OpenCL implementation of ring polymer contraction

parent 863447a6
......@@ -169,7 +169,7 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
replacements.clear();
replacements["NUM_CONTRACTED_COPIES"] = cu.intToString(copies);
replacements["POS_SCALE"] = cu.doubleToString(1.0/numCopies);
replacements["FORCE_SCALE"] = cu.doubleToString(1.0/copies);
replacements["FORCE_SCALE"] = cu.doubleToString(0x100000000/(double) copies);
replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
replacements["FFT_Q_BACKWARD"] = createFFT(copies, "q", false);
replacements["FFT_F_FORWARD"] = createFFT(copies, "f", true);
......
......@@ -87,7 +87,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
if (indexInBlock < NUM_CONTRACTED_COPIES) {
freal[indexInBlock] = make_mixed3(contracted[forceIndex], contracted[forceIndex+PADDED_NUM_ATOMS], contracted[forceIndex+PADDED_NUM_ATOMS*2]);
freal[indexInBlock] = make_mixed3(contracted[forceIndex]*forceScale, contracted[forceIndex+PADDED_NUM_ATOMS]*forceScale, contracted[forceIndex+PADDED_NUM_ATOMS*2]*forceScale);
fimag[indexInBlock] = make_mixed3(0);
}
__syncthreads();
......@@ -114,8 +114,8 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
// Store results.
force[forceIndex] = FORCE_SCALE*freal[indexInBlock].x;
force[forceIndex+PADDED_NUM_ATOMS] = FORCE_SCALE*freal[indexInBlock].y;
force[forceIndex+PADDED_NUM_ATOMS*2] = FORCE_SCALE*freal[indexInBlock].z;
force[forceIndex] = (long long) (FORCE_SCALE*freal[indexInBlock].x);
force[forceIndex+PADDED_NUM_ATOMS] = (long long) (FORCE_SCALE*freal[indexInBlock].y);
force[forceIndex+PADDED_NUM_ATOMS*2] = (long long) (FORCE_SCALE*freal[indexInBlock].z);
}
}
......@@ -440,6 +440,7 @@ int main(int argc, char* argv[]) {
testParaHydrogen();
testCMMotionRemoval();
testVirtualSites();
testContractions();
}
catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
......
......@@ -48,6 +48,10 @@ OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
......@@ -86,6 +90,34 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp);
}
// Build a list of contractions.
groupsNotContracted = -1;
const map<int, int>& contractions = integrator.getContractions();
int maxContractedCopies = 0;
for (map<int, int>::const_iterator iter = contractions.begin(); iter != contractions.end(); ++iter) {
int group = iter->first;
int copies = iter->second;
if (group < 0 || group > 31)
throw OpenMMException("RPMDIntegrator: Force group must be between 0 and 31");
if (copies < 0 || copies > numCopies)
throw OpenMMException("RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated");
if (copies != numCopies) {
if (groupsByCopies.find(copies) == groupsByCopies.end()) {
groupsByCopies[copies] = 1<<group;
groupsNotContracted -= 1<<group;
if (copies > maxContractedCopies)
maxContractedCopies = copies;
}
else
groupsByCopies[copies] |= 1<<group;
}
}
if (maxContractedCopies > 0) {
contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
}
// Create kernels.
......@@ -109,6 +141,23 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
copyToContextKernel = cl::Kernel(program, "copyDataToContext");
copyFromContextKernel = cl::Kernel(program, "copyDataFromContext");
translateKernel = cl::Kernel(program, "applyCellTranslations");
// Create kernels for doing contractions.
for (map<int, int>::const_iterator iter = groupsByCopies.begin(); iter != groupsByCopies.end(); ++iter) {
int copies = iter->first;
replacements.clear();
replacements["NUM_CONTRACTED_COPIES"] = cl.intToString(copies);
replacements["POS_SCALE"] = cl.doubleToString(1.0/numCopies);
replacements["FORCE_SCALE"] = cl.doubleToString(1.0/copies);
replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
replacements["FFT_Q_BACKWARD"] = createFFT(copies, "q", false);
replacements["FFT_F_FORWARD"] = createFFT(copies, "f", true);
replacements["FFT_F_BACKWARD"] = createFFT(numCopies, "f", false);
program = cl.createProgram(cl.replaceStrings(OpenCLRpmdKernelSources::rpmdContraction, replacements), defines, "");
positionContractionKernels[copies] = cl::Kernel(program, "contractPositions");
forceContractionKernels[copies] = cl::Kernel(program, "contractForces");
}
}
void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
......@@ -124,16 +173,20 @@ void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
for (map<int, int>::const_iterator iter = groupsByCopies.begin(); iter != groupsByCopies.end(); ++iter) {
int copies = iter->first;
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
}
}
void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
......@@ -201,15 +254,51 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
}
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer());
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.computeVirtualSites();
context.updateContextState();
context.calcForcesAndEnergy(true, false);
context.calcForcesAndEnergy(true, false, groupsNotContracted);
copyFromContextKernel.setArg<cl_int>(7, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
// Now loop over contractions and compute forces from them.
if (groupsByCopies.size() > 0) {
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer());
for (map<int, int>::const_iterator iter = groupsByCopies.begin(); iter != groupsByCopies.end(); ++iter) {
int copies = iter->first;
int groupFlags = iter->second;
// Find the contracted positions.
cl.executeKernel(positionContractionKernels[copies], numParticles*numCopies, workgroupSize);
// Compute forces.
for (int i = 0; i < copies; i++) {
copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
context.computeVirtualSites();
context.calcForcesAndEnergy(true, false, groupFlags);
copyFromContextKernel.setArg<cl_int>(7, i);
cl.executeKernel(copyFromContextKernel, cl.getNumAtoms());
}
// Apply the forces to the original copies.
cl.executeKernel(forceContractionKernels[copies], numParticles*numCopies, workgroupSize);
}
}
}
double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, const RPMDIntegrator& integrator) {
......@@ -269,6 +358,7 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
if (!hasInitializedKernel)
initializeKernels(context);
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
......
......@@ -35,7 +35,7 @@
#include "openmm/RpmdKernels.h"
#include "OpenCLContext.h"
#include "OpenCLArray.h"
#include <map>
namespace OpenMM {
/**
......@@ -45,7 +45,7 @@ namespace OpenMM {
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL) {
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
}
~OpenCLIntegrateRPMDStepKernel();
/**
......@@ -90,10 +90,16 @@ private:
OpenCLContext& cl;
bool hasInitializedKernel;
int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies;
int groupsNotContracted;
OpenCLArray* forces;
OpenCLArray* positions;
OpenCLArray* velocities;
OpenCLArray* contractedForces;
OpenCLArray* contractedPositions;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, cl::Kernel> positionContractionKernels;
std::map<int, cl::Kernel> forceContractionKernels;
};
} // namespace OpenMM
......
mixed4 multiplyComplexRealPart(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2r-c1.y*c2i;
}
mixed4 multiplyComplexImagPart(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2i+c1.y*c2r;
}
mixed4 multiplyComplexRealPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2r+c1.y*c2i;
}
mixed4 multiplyComplexImagPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2i-c1.y*c2r;
}
/**
* Compute the contracted positions
*/
__kernel void contractPositions(__global mixed4* posq, __global mixed4* contracted) {
const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 q[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
__local mixed4* qreal = &q[blockStart];
__local mixed4* qimag = &q[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the particle position.
mixed4 particlePosq = convert_mixed4(posq[particle+indexInBlock*PADDED_NUM_ATOMS]);
qreal[indexInBlock] = particlePosq;
qimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f);
// Forward FFT.
barrier(CLK_LOCAL_MEM_FENCE);
FFT_Q_FORWARD
if (NUM_CONTRACTED_COPIES > 1) {
// Compress the data to remove high frequencies.
int start = (NUM_CONTRACTED_COPIES+1)/2;
tempreal[indexInBlock] = qreal[indexInBlock];
tempimag[indexInBlock] = qimag[indexInBlock];
barrier(CLK_LOCAL_MEM_FENCE);
if (indexInBlock < NUM_CONTRACTED_COPIES) {
qreal[indexInBlock] = tempreal[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
}
barrier(CLK_LOCAL_MEM_FENCE);
FFT_Q_BACKWARD
}
// Store results.
if (indexInBlock < NUM_CONTRACTED_COPIES)
contracted[particle+indexInBlock*PADDED_NUM_ATOMS] = (mixed4) (POS_SCALE*qreal[indexInBlock].x, POS_SCALE*qreal[indexInBlock].y, POS_SCALE*qreal[indexInBlock].z, particlePosq.w);
}
}
/**
* Apply the contracted forces to all copies.
*/
__kernel void contractForces(__global real4* force, __global real4* contracted) {
const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 f[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
__local mixed4* freal = &f[blockStart];
__local mixed4* fimag = &f[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the force.
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
if (indexInBlock < NUM_CONTRACTED_COPIES) {
freal[indexInBlock] = convert_mixed4(contracted[index]);
fimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f);
}
barrier(CLK_LOCAL_MEM_FENCE);
// Forward FFT.
if (NUM_CONTRACTED_COPIES > 1) {
FFT_F_FORWARD
}
// Set the high frequency components to 0.
int start = (NUM_CONTRACTED_COPIES+1)/2;
int end = NUM_COPIES-NUM_CONTRACTED_COPIES+start;
tempreal[indexInBlock] = freal[indexInBlock];
tempimag[indexInBlock] = fimag[indexInBlock];
barrier(CLK_LOCAL_MEM_FENCE);
if (indexInBlock >= start) {
freal[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempreal[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
fimag[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
}
barrier(CLK_LOCAL_MEM_FENCE);
FFT_F_BACKWARD
// Store results.
force[index] = FORCE_SCALE*freal[indexInBlock];
}
}
......@@ -356,6 +356,82 @@ void testVirtualSites() {
ASSERT_USUALLY_EQUAL_TOL(expectedKE, meanKE, 1e-2);
}
void testContractions() {
const int gridSize = 3;
const int numMolecules = gridSize*gridSize*gridSize;
const int numParticles = numMolecules*2;
const int numCopies = 10;
const double spacing = 2.0;
const double cutoff = 3.0;
const double boxSize = spacing*(gridSize+1);
const double temperature = 300.0;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
HarmonicBondForce* bonds = new HarmonicBondForce();
system.addForce(bonds);
NonbondedForce* nonbonded = new NonbondedForce();
nonbonded->setCutoffDistance(cutoff);
nonbonded->setNonbondedMethod(NonbondedForce::PME);
nonbonded->setForceGroup(1);
nonbonded->setReciprocalSpaceForceGroup(2);
system.addForce(nonbonded);
// Create a cloud of molecules.
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numMolecules; i++) {
system.addParticle(1.0);
system.addParticle(1.0);
nonbonded->addParticle(-0.2, 0.2, 0.2);
nonbonded->addParticle(0.2, 0.2, 0.2);
nonbonded->addException(2*i, 2*i+1, 0, 1, 0);
bonds->addBond(2*i, 2*i+1, 1.0, 10000.0);
}
map<int, int> contractions;
contractions[1] = 3;
contractions[2] = 1;
RPMDIntegrator integ(numCopies, temperature, 10.0, 0.001, contractions);
Platform& platform = Platform::getPlatformByName("OpenCL");
Context context(system, integ, platform);
for (int copy = 0; copy < numCopies; copy++) {
for (int i = 0; i < gridSize; i++)
for (int j = 0; j < gridSize; j++)
for (int k = 0; k < gridSize; k++) {
Vec3 pos = Vec3(spacing*(i+0.02*genrand_real2(sfmt)), spacing*(j+0.02*genrand_real2(sfmt)), spacing*(k+0.02*genrand_real2(sfmt)));
int index = k+gridSize*(j+gridSize*i);
positions[2*index] = pos;
positions[2*index+1] = Vec3(pos[0]+1.0, pos[1], pos[2]);
}
integ.setPositions(copy, positions);
}
// Check the temperature.
const int numSteps = 1000;
integ.step(1000);
vector<double> ke(numCopies, 0.0);
for (int i = 0; i < numSteps; i++) {
integ.step(1);
vector<State> state(numCopies);
for (int j = 0; j < numCopies; j++)
state[j] = integ.getState(j, State::Velocities, true);
for (int j = 0; j < numParticles; j++) {
for (int k = 0; k < numCopies; k++) {
Vec3 v = state[k].getVelocities()[j];
ke[k] += 0.5*system.getParticleMass(j)*v.dot(v);
}
}
}
double meanKE = 0.0;
for (int i = 0; i < numCopies; i++)
meanKE += ke[i];
meanKE /= numSteps*numCopies;
double expectedKE = 0.5*numCopies*numParticles*3*BOLTZ*temperature;
ASSERT_USUALLY_EQUAL_TOL(expectedKE, meanKE, 1e-2);
}
int main(int argc, char* argv[]) {
try {
registerRPMDOpenCLKernelFactories();
......@@ -365,6 +441,7 @@ int main(int argc, char* argv[]) {
testParaHydrogen();
testCMMotionRemoval();
testVirtualSites();
testContractions();
}
catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment