Commit f4dc3110 authored by Peter Eastman's avatar Peter Eastman
Browse files

CUDA implementation of RMSDForce

parent 2855d12e
...@@ -1278,6 +1278,59 @@ private: ...@@ -1278,6 +1278,59 @@ private:
CUfunction copyStateKernel, copyForcesKernel, addForcesKernel; CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
/**
* This kernel is invoked by RMSDForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcRMSDForceKernel : public CalcRMSDForceKernel {
public:
CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform),
cu(cu), referencePos(NULL), particles(NULL), buffer(NULL) {
}
~CudaCalcRMSDForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RMSDForce this kernel will be used for
*/
void initialize(const System& system, const RMSDForce& force);
/**
* Record the reference positions and particle indices.
*/
void recordParameters(const RMSDForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* This is the internal implementation of execute(), templatized on whether we're
* using single or double precision.
*/
template <class REAL>
double executeImpl(ContextImpl& context);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RMSDForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const RMSDForce& force);
private:
class ForceInfo;
CudaContext& cu;
ForceInfo* info;
double sumNormRef;
CudaArray* referencePos;
CudaArray* particles;
CudaArray* buffer;
CUfunction kernel1, kernel2;
};
/** /**
* This kernel is invoked by VerletIntegrator to take one time step. * This kernel is invoked by VerletIntegrator to take one time step.
*/ */
......
...@@ -110,6 +110,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -110,6 +110,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomCVForceKernel::Name()) if (name == CalcCustomCVForceKernel::Name())
return new CudaCalcCustomCVForceKernel(name, platform, cu); return new CudaCalcCustomCVForceKernel(name, platform, cu);
if (name == CalcRMSDForceKernel::Name())
return new CudaCalcRMSDForceKernel(name, platform, cu);
if (name == CalcCustomManyParticleForceKernel::Name()) if (name == CalcCustomManyParticleForceKernel::Name())
return new CudaCalcCustomManyParticleForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcCustomManyParticleForceKernel(name, platform, cu, context.getSystem());
if (name == CalcGayBerneForceKernel::Name()) if (name == CalcGayBerneForceKernel::Name())
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include "ReferenceTabulatedFunction.h" #include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "jama_eig.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <set> #include <set>
...@@ -6784,6 +6785,200 @@ void CudaCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& i ...@@ -6784,6 +6785,200 @@ void CudaCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& i
innerContext.setParameter(param.first, context.getParameter(param.first)); innerContext.setParameter(param.first, context.getParameter(param.first));
} }
class CudaCalcRMSDForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const RMSDForce& force) : force(force) {
updateParticles();
}
void updateParticles() {
particles.clear();
for (int i : force.getParticles())
particles.insert(i);
}
bool areParticlesIdentical(int particle1, int particle2) {
bool include1 = (particles.find(particle1) != particles.end());
bool include2 = (particles.find(particle2) != particles.end());
return (include1 == include2);
}
private:
const RMSDForce& force;
set<int> particles;
};
CudaCalcRMSDForceKernel::~CudaCalcRMSDForceKernel() {
if (referencePos != NULL)
delete referencePos;
if (particles != NULL)
delete particles;
if (buffer != NULL)
delete buffer;
}
void CudaCalcRMSDForceKernel::initialize(const System& system, const RMSDForce& force) {
// Create data structures.
bool useDouble = cu.getUseDoublePrecision();
int elementSize = (useDouble ? sizeof(double) : sizeof(float));
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = system.getNumParticles();
referencePos = new CudaArray(cu, system.getNumParticles(), 4*elementSize, "referencePos");
particles = CudaArray::create<int>(cu, numParticles, "particles");
buffer = new CudaArray(cu, 13, elementSize, "buffer");
recordParameters(force);
info = new ForceInfo(force);
cu.addForce(info);
// Create the kernels.
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::rmsd);
kernel1 = cu.getKernel(module, "computeRMSDPart1");
kernel2 = cu.getKernel(module, "computeRMSDForces");
}
void CudaCalcRMSDForceKernel::recordParameters(const RMSDForce& force) {
// Record the parameters and center the reference positions.
vector<int> particleVec = force.getParticles();
if (particleVec.size() == 0)
for (int i = 0; i < cu.getNumAtoms(); i++)
particleVec.push_back(i);
vector<Vec3> centeredPositions = force.getReferencePositions();
Vec3 center;
for (int i : particleVec)
center += centeredPositions[i];
center /= particleVec.size();
for (Vec3& p : centeredPositions)
p -= center;
// Upload them to the device.
particles->upload(particleVec);
if (cu.getUseDoublePrecision()) {
vector<double4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(make_double4(p[0], p[1], p[2], 0));
referencePos->upload(pos);
}
else {
vector<float4> pos;
for (Vec3 p : centeredPositions)
pos.push_back(make_float4(p[0], p[1], p[2], 0));
referencePos->upload(pos);
}
// Record the sum of the norms of the reference positions.
sumNormRef = 0.0;
for (int i : particleVec) {
Vec3 p = centeredPositions[i];
sumNormRef += p.dot(p);
}
}
double CudaCalcRMSDForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (cu.getUseDoublePrecision())
return executeImpl<double>(context);
return executeImpl<float>(context);
}
template <class REAL>
double CudaCalcRMSDForceKernel::executeImpl(ContextImpl& context) {
// Execute the first kernel.
int numParticles = particles->getSize();
int blockSize = 128;
void* args1[] = {&numParticles, &cu.getPosq().getDevicePointer(), &referencePos->getDevicePointer(),
&particles->getDevicePointer(), &buffer->getDevicePointer()};
cu.executeKernel(kernel1, args1, blockSize, blockSize, blockSize*sizeof(REAL));
// Download the results, build the F matrix, and find the maximum eigenvalue
// and eigenvector.
vector<REAL> b;
buffer->download(b);
Array2D<double> F(4, 4);
F[0][0] = b[0*3+0] + b[1*3+1] + b[2*3+2];
F[1][0] = b[1*3+2] - b[2*3+1];
F[2][0] = b[2*3+0] - b[0*3+2];
F[3][0] = b[0*3+1] - b[1*3+0];
F[0][1] = b[1*3+2] - b[2*3+1];
F[1][1] = b[0*3+0] - b[1*3+1] - b[2*3+2];
F[2][1] = b[0*3+1] + b[1*3+0];
F[3][1] = b[0*3+2] + b[2*3+0];
F[0][2] = b[2*3+0] - b[0*3+2];
F[1][2] = b[0*3+1] + b[1*3+0];
F[2][2] = -b[0*3+0] + b[1*3+1] - b[2*3+2];
F[3][2] = b[1*3+2] + b[2*3+1];
F[0][3] = b[0*3+1] - b[1*3+0];
F[1][3] = b[0*3+2] + b[2*3+0];
F[2][3] = b[1*3+2] + b[2*3+1];
F[3][3] = -b[0*3+0] - b[1*3+1] + b[2*3+2];
JAMA::Eigenvalue<double> eigen(F);
Array1D<double> values;
eigen.getRealEigenvalues(values);
Array2D<double> vectors;
eigen.getV(vectors);
// Compute the RMSD.
double msd = (sumNormRef+b[9]-2*values[3])/numParticles;
if (msd < 1e-20) {
// The particles are perfectly aligned, so all the forces should be zero.
// Numerical error can lead to NaNs, so just return 0 now.
return 0.0;
}
double rmsd = sqrt(msd);
b[9] = rmsd;
// Compute the rotation matrix.
double q[] = {vectors[0][3], vectors[1][3], vectors[2][3], vectors[3][3]};
double q00 = q[0]*q[0], q01 = q[0]*q[1], q02 = q[0]*q[2], q03 = q[0]*q[3];
double q11 = q[1]*q[1], q12 = q[1]*q[2], q13 = q[1]*q[3];
double q22 = q[2]*q[2], q23 = q[2]*q[3];
double q33 = q[3]*q[3];
b[0] = q00+q11-q22-q33;
b[1] = 2*(q12-q03);
b[2] = 2*(q13+q02);
b[3] = 2*(q12+q03);
b[4] = q00-q11+q22-q33;
b[5] = 2*(q23-q01);
b[6] = 2*(q13-q02);
b[7] = 2*(q23+q01);
b[8] = q00-q11-q22+q33;
// Upload it to the device and invoke the kernel to apply forces.
buffer->upload(b);
int paddedNumAtoms = cu.getPaddedNumAtoms();
void* args2[] = {&numParticles, &paddedNumAtoms, &cu.getPosq().getDevicePointer(), &referencePos->getDevicePointer(),
&particles->getDevicePointer(), &buffer->getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(kernel2, args2, numParticles);
return rmsd;
}
void CudaCalcRMSDForceKernel::copyParametersToContext(ContextImpl& context, const RMSDForce& force) {
if (referencePos->getSize() != force.getReferencePositions().size())
throw OpenMMException("updateParametersInContext: The number of reference positions has changed");
int numParticles = force.getParticles().size();
if (numParticles == 0)
numParticles = context.getSystem().getNumParticles();
if (numParticles != particles->getSize()) {
// Recreate the particles array.
delete particles;
particles = NULL;
particles = CudaArray::create<int>(cu, numParticles, "particles");
}
recordParameters(force);
// Mark that the current reordering may be invalid.
info->updateParticles();
cu.invalidateMolecules(info);
}
CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() { CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
} }
......
...@@ -92,6 +92,7 @@ CudaPlatform::CudaPlatform() { ...@@ -92,6 +92,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCVForceKernel::Name(), factory); registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
registerKernelFactory(CalcRMSDForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
......
// This file contains kernels to compute the RMSD and its gradient using the algorithm described
// in Coutsias et al, "Using quaternions to calculate RMSD" (doi: 10.1002/jcc.20110).
/**
* Sum a value over all threads.
*/
__device__ real reduceValue(real value, real* temp) {
const int thread = threadIdx.x;
temp[thread] = value;
__syncthreads();
for (uint step = 1; step < blockDim.x; step *= 2) {
if (thread+step < blockDim.x && thread%(2*step) == 0)
temp[thread] = temp[thread] + temp[thread+step];
__syncthreads();
}
return temp[0];
}
/**
* Perform the first step of computing the RMSD. This is executed as a single work group.
*/
extern "C" __global__ void computeRMSDPart1(int numParticles, const real4* __restrict__ posq, const real4* __restrict__ referencePos,
const int* __restrict__ particles, real* buffer) {
extern __shared__ real temp[];
// Compute the center of the particle positions.
real3 center = make_real3(0);
for (int i = threadIdx.x; i < numParticles; i += blockDim.x)
center += trimTo3(posq[particles[i]]);
center.x = reduceValue(center.x, temp)/numParticles;
center.y = reduceValue(center.y, temp)/numParticles;
center.z = reduceValue(center.z, temp)/numParticles;
// Compute the correlation matrix.
real R[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
real sum = 0;
for (int i = threadIdx.x; i < numParticles; i += blockDim.x) {
int index = particles[i];
real3 pos = trimTo3(posq[index]) - center;
real3 refPos = trimTo3(referencePos[index]);
R[0][0] += pos.x*refPos.x;
R[0][1] += pos.x*refPos.y;
R[0][2] += pos.x*refPos.z;
R[1][0] += pos.y*refPos.x;
R[1][1] += pos.y*refPos.y;
R[1][2] += pos.y*refPos.z;
R[2][0] += pos.z*refPos.x;
R[2][1] += pos.z*refPos.y;
R[2][2] += pos.z*refPos.z;
sum += dot(pos, pos);
}
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
R[i][j] = reduceValue(R[i][j], temp);
sum = reduceValue(sum, temp);
// Copy everything into the output buffer to send back to the host.
if (threadIdx.x == 0) {
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
buffer[3*i+j] = R[i][j];
buffer[9] = sum;
buffer[10] = center.x;
buffer[11] = center.y;
buffer[12] = center.z;
}
}
/**
* Apply forces based on the RMSD.
*/
extern "C" __global__ void computeRMSDForces(int numParticles, int paddedNumAtoms, const real4* __restrict__ posq, const real4* __restrict__ referencePos,
const int* __restrict__ particles, const real* buffer, unsigned long long* __restrict__ forceBuffers) {
real3 center = make_real3(buffer[10], buffer[11], buffer[12]);
real scale = 1 / (real) (buffer[9]*numParticles);
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numParticles; i += blockDim.x*gridDim.x) {
int index = particles[i];
real3 pos = trimTo3(posq[index]) - center;
real3 refPos = trimTo3(referencePos[index]);
real3 rotatedRef = make_real3(buffer[0]*refPos.x + buffer[3]*refPos.y + buffer[6]*refPos.z,
buffer[1]*refPos.x + buffer[4]*refPos.y + buffer[7]*refPos.z,
buffer[2]*refPos.x + buffer[5]*refPos.y + buffer[8]*refPos.z);
real3 force = (rotatedRef-pos)*scale;
atomicAdd(&forceBuffers[index], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[index+paddedNumAtoms], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[index+2*paddedNumAtoms], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestRMSDForce.h"
void runPlatformTests() {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment