Created CUDA implementation of CMAPTorsionForce

2a313e48 · Peter Eastman · 43b1bc95 · 2a313e48 · 2a313e48 · 2a313e48
Commit 2a313e48 authored Jul 20, 2010 by Peter Eastman
8 changed files
--- a/platforms/cuda/src/CudaKernelFactory.cpp
+++ b/platforms/cuda/src/CudaKernelFactory.cpp
@@ -51,6 +51,8 @@ OPENMMCUDA_EXPORT KernelImpl* CudaKernelFactory::createKernelImpl(std::string na
        return new CudaCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
    if (name == CalcRBTorsionForceKernel::Name())
        return new CudaCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
+    if (name == CalcCMAPTorsionForceKernel::Name())
+        return new CudaCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
    if (name == CalcCustomTorsionForceKernel::Name())
        return new CudaCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
    if (name == CalcNonbondedForceKernel::Name())

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -28,6 +28,7 @@
 #include "openmm/LangevinIntegrator.h"
 #include "openmm/Context.h"
 #include "openmm/OpenMMException.h"
+#include "openmm/internal/CMAPTorsionForceImpl.h"
 #include "openmm/internal/ContextImpl.h"
 #include "openmm/internal/NonbondedForceImpl.h"
 #include "kernels/gputypes.h"
@@ -421,6 +422,76 @@ double CudaCalcRBTorsionForceKernel::executeEnergy(ContextImpl& context) {
    return 0.0;
 }
+CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
+    if (coefficients != NULL)
+        delete coefficients;
+    if (mapPositions != NULL)
+        delete mapPositions;
+    if (torsionMaps != NULL)
+        delete torsionMaps;
+    if (torsionIndices != NULL)
+        delete torsionIndices;
+}
+void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
+    numTorsions = force.getNumTorsions();
+    if (numTorsions == 0)
+        return;
+    int numMaps = force.getNumMaps();
+    vector<float4> coeffVec;
+    vector<int2> mapPositionsVec(numMaps);
+    vector<double> energy;
+    vector<vector<double> > c;
+    int currentPosition = 0;
+    mapPositions = new CUDAStream<int2>(numMaps, 1, "cmapTorsionMapPositions");
+    for (int i = 0; i < numMaps; i++) {
+        int size;
+        force.getMapParameters(i, size, energy);
+        CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
+        (*mapPositions)[i] = make_int2(currentPosition, size);
+        currentPosition += 4*size*size;
+        for (int j = 0; j < size*size; j++) {
+            coeffVec.push_back(make_float4(c[j][0], c[j][1], c[j][2], c[j][3]));
+            coeffVec.push_back(make_float4(c[j][4], c[j][5], c[j][6], c[j][7]));
+            coeffVec.push_back(make_float4(c[j][8], c[j][9], c[j][10], c[j][11]));
+            coeffVec.push_back(make_float4(c[j][12], c[j][13], c[j][14], c[j][15]));
+        }
+    }
+    coefficients = new CUDAStream<float4>((int) coeffVec.size(), 1, "cmapTorsionCoefficients");;
+    for (int i = 0; i < (int) coeffVec.size(); i++)
+        (*coefficients)[i] = coeffVec[i];
+    torsionMaps = new CUDAStream<int>(numTorsions, 1, "cmapTorsionMaps");
+    torsionIndices = new CUDAStream<int4>(4*numTorsions, 1, "cmapTorsionIndices");
+    vector<int> forceBufferCounter(system.getNumParticles(), 0);
+    for (int i = 0; i < numTorsions; i++) {
+        int map, a1, a2, a3, a4, b1, b2, b3, b4;
+        force.getTorsionParameters(i, map, a1, a2, a3, a4, b1, b2, b3, b4);
+        (*torsionMaps)[i] = map;
+        (*torsionIndices)[i*4] = make_int4(a1, a2, a3, a4);
+        (*torsionIndices)[i*4+1] = make_int4(b1, b2, b3, b4);
+        (*torsionIndices)[i*4+2] = make_int4(forceBufferCounter[a1]++, forceBufferCounter[a2]++, forceBufferCounter[a3]++, forceBufferCounter[a4]++);
+        (*torsionIndices)[i*4+3] = make_int4(forceBufferCounter[b1]++, forceBufferCounter[b2]++, forceBufferCounter[b3]++, forceBufferCounter[b4]++);
+    }
+    coefficients->Upload();
+    mapPositions->Upload();
+    torsionMaps->Upload();
+    torsionIndices->Upload();
+    int maxBuffers = 1;
+    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
+        maxBuffers = max(maxBuffers, forceBufferCounter[i]);
+    if (maxBuffers > data.gpu->sim.outputBuffers)
+        data.gpu->sim.outputBuffers = maxBuffers;
+}
+void CudaCalcCMAPTorsionForceKernel::executeForces(ContextImpl& context) {
+    kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
+}
+double CudaCalcCMAPTorsionForceKernel::executeEnergy(ContextImpl& context) {
+    kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
+    return 0.0;
+}
 CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
 }

--- a/platforms/cuda/src/CudaKernels.h
+++ b/platforms/cuda/src/CudaKernels.h
@@ -405,6 +405,46 @@ private:
    System& system;
 };
+/**
+ * This kernel is invoked by CMAPTorsionForce to calculate the forces acting on the system and the energy of the system.
+ */
+class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
+public:
+    CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
+            CalcCMAPTorsionForceKernel(name, platform), data(data), system(system), coefficients(NULL), mapPositions(NULL),
+            torsionIndices(NULL), torsionMaps(NULL) {
+    }
+    ~CudaCalcCMAPTorsionForceKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param force      the CMAPTorsionForce this kernel will be used for
+     */
+    void initialize(const System& system, const CMAPTorsionForce& force);
+    /**
+     * Execute the kernel to calculate the forces.
+     *
+     * @param context    the context in which to execute this kernel
+     */
+    void executeForces(ContextImpl& context);
+    /**
+     * Execute the kernel to calculate the energy.
+     *
+     * @param context    the context in which to execute this kernel
+     * @return the potential energy due to the CMAPTorsionForce
+     */
+    double executeEnergy(ContextImpl& context);
+private:
+    CudaPlatform::PlatformData& data;
+    System& system;
+    int numTorsions;
+    CUDAStream<float4>* coefficients;
+    CUDAStream<int2>* mapPositions;
+    CUDAStream<int4>* torsionIndices;
+    CUDAStream<int>* torsionMaps;
+};
 /**
 * This kernel is invoked by CustomTorsionForce to calculate the forces acting on the system and the energy of the system.
 */

--- a/platforms/cuda/src/CudaPlatform.cpp
+++ b/platforms/cuda/src/CudaPlatform.cpp
@@ -54,6 +54,7 @@ CudaPlatform::CudaPlatform() {
    registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
+    registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);

--- a/platforms/cuda/src/kernels/cudaKernels.h
+++ b/platforms/cuda/src/kernels/cudaKernels.h
@@ -42,6 +42,7 @@ extern void kGenerateRandoms(gpuContext gpu);
 extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
 extern void kCalculateCDLJGBVIForces1(gpuContext gpu);
 extern void kCalculateCDLJForces(gpuContext gpu);
+extern void kCalculateCMAPTorsionForces(gpuContext gpu, CUDAStream<float4>& coefficients, CUDAStream<int2>& mapPositions, CUDAStream<int4>& torsionIndices, CUDAStream<int>& torsionMaps);
 extern void kCalculateCustomBondForces(gpuContext gpu);
 extern void kCalculateCustomAngleForces(gpuContext gpu);
 extern void kCalculateCustomTorsionForces(gpuContext gpu);

--- a/platforms/cuda/src/kernels/gpu.cpp
+++ b/platforms/cuda/src/kernels/gpu.cpp
@@ -1677,6 +1677,7 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
        (*gpu->psAtomIndex)[i] = i;
    gpu->psAtomIndex->Upload();
    gpu->posCellOffsets.resize(gpu->natoms, make_int3(0, 0, 0));
+    gpu->sim.outputBuffers = 0;
    // Determine randoms
    gpu->seed                           = 1;
    gpu->sim.randomFrames               = 20;
@@ -2183,7 +2184,8 @@ int gpuBuildOutputBuffers(gpuContext gpu)
        gpu->bOutputBufferPerWarp           = false;
        gpu->sim.nonbondOutputBuffers       = gpu->sim.paddedNumberOfAtoms / GRID;
    }
-    gpu->sim.outputBuffers              = gpu->sim.nonbondOutputBuffers;
+    if (gpu->sim.nonbondOutputBuffers > gpu->sim.outputBuffers)
+        gpu->sim.outputBuffers = gpu->sim.nonbondOutputBuffers;
    unsigned int outputBuffers = gpu->sim.outputBuffers;
    for (unsigned int i = 0; i < gpu->sim.paddedNumberOfAtoms; i++)

--- a/platforms/cuda/src/kernels/kCalculateCMAPTorsionForces.cu
+++ b/platforms/cuda/src/kernels/kCalculateCMAPTorsionForces.cu
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+#include <stdio.h>
+#include <cuda.h>
+#include <vector_functions.h>
+#include <cstdlib>
+#include <string>
+#include <iostream>
+#include <fstream>
+using namespace std;
+#include "gputypes.h"
+#include "cudatypes.h"
+#define LOCAL_HACK_PI 3.1415926535897932384626433832795
+#define DOT3(v1, v2) (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z)
+#define CROSS_PRODUCT(v1, v2) make_float3(v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x)
+#define GETNORMEDDOTPRODUCT(v1, v2, dp) \
+{ \
+    dp          = DOT3(v1, v2); \
+    float norm1 = DOT3(v1, v1); \
+    float norm2 = DOT3(v2, v2); \
+    dp /= sqrt(norm1 * norm2); \
+    dp = min(dp, 1.0f); \
+    dp = max(dp, -1.0f); \
+}
+#define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \
+{ \
+    float dp; \
+    GETNORMEDDOTPRODUCT(v1, v2, dp); \
+    if (dp > 0.99f || dp < -0.99f) { \
+        float3 cross = CROSS_PRODUCT(v1, v2); \
+        float scale = DOT3(v1, v1)*DOT3(v2, v2); \
+        angle = asin(sqrt(DOT3(cross, cross)/scale)); \
+        if (dp < 0.0f) \
+            angle = LOCAL_HACK_PI-angle; \
+    } \
+    else { \
+        angle = acos(dp); \
+    } \
+}
+#define GETDIHEDRALANGLEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle) \
+{ \
+    cp0 = CROSS_PRODUCT(vector1, vector2); \
+    cp1 = CROSS_PRODUCT(vector2, vector3); \
+    GETANGLEBETWEENTWOVECTORS(cp0, cp1, angle); \
+    float dp = DOT3(signVector, cp1); \
+    angle = (dp >= 0) ? angle : -angle; \
+}
+__global__
+#if (__CUDA_ARCH__ >= 200)
+__launch_bounds__(GF1XX_LOCALFORCES_THREADS_PER_BLOCK, 1)
+#elif (__CUDA_ARCH__ >= 130)
+__launch_bounds__(GT2XX_LOCALFORCES_THREADS_PER_BLOCK, 1)
+#else
+__launch_bounds__(G8X_LOCALFORCES_THREADS_PER_BLOCK, 1)
+#endif
+void kCalculateCMAPTorsionForces_kernel(int numAtoms, int numTorsions, float4* forceBuffers, float* energyBuffer,
+        float4* posq, float4* coeff, int2* mapPositions, int4* indices, int* maps)
+{
+    const float PI = 3.14159265358979323846f;
+    float energy = 0.0f;
+    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numTorsions; index += blockDim.x*gridDim.x) {
+        int4 atoms1 = indices[4*index];
+        int4 atoms2 = indices[4*index+1];
+        int4 atoms3 = indices[4*index+2];
+        int4 atoms4 = indices[4*index+3];
+        float4 a1 = posq[atoms1.x];
+        float4 a2 = posq[atoms1.y];
+        float4 a3 = posq[atoms1.z];
+        float4 a4 = posq[atoms1.w];
+        float4 b1 = posq[atoms2.x];
+        float4 b2 = posq[atoms2.y];
+        float4 b3 = posq[atoms2.z];
+        float4 b4 = posq[atoms2.w];
+        // Compute the first angle.
+        float3 v0a = make_float3(a1.x-a2.x, a1.y-a2.y, a1.z-a2.z);
+        float3 v1a = make_float3(a3.x-a2.x, a3.y-a2.y, a3.z-a2.z);
+        float3 v2a = make_float3(a3.x-a4.x, a3.y-a4.y, a3.z-a4.z);
+        float3 cp0a, cp1a;
+        float angleA;
+        GETDIHEDRALANGLEBETWEENTHREEVECTORS(v0a, v1a, v2a, v0a, cp0a, cp1a, angleA);
+        angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
+        // Compute the second angle.
+        float3 v0b = make_float3(b1.x-b2.x, b1.y-b2.y, b1.z-b2.z);
+        float3 v1b = make_float3(b3.x-b2.x, b3.y-b2.y, b3.z-b2.z);
+        float3 v2b = make_float3(b3.x-b4.x, b3.y-b4.y, b3.z-b4.z);
+        float3 cp0b, cp1b;
+        float angleB;
+        GETDIHEDRALANGLEBETWEENTHREEVECTORS(v0b, v1b, v2b, v0b, cp0b, cp1b, angleB);
+        angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
+        // Identify which patch this is in.
+        int2 pos = mapPositions[maps[index]];
+        int size = pos.y;
+        float delta = 2.0f*PI/size;
+        int s = (int) (angleA/delta);
+        int t = (int) (angleB/delta);
+        float4 c[4];
+        int coeffIndex = 4*(pos.x+s+size*t);
+        c[0] = coeff[coeffIndex];
+        c[1] = coeff[coeffIndex+1];
+        c[2] = coeff[coeffIndex+2];
+        c[3] = coeff[coeffIndex+3];
+        float da = angleA/delta-s;
+        float db = angleB/delta-t;
+        // Evaluate the spline to determine the energy and gradients.
+        float torsionEnergy = 0.0f;
+        float dEdA = 0.0f;
+        float dEdB = 0.0f;
+        torsionEnergy = da*torsionEnergy + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;
+        dEdA = db*dEdA + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;
+        dEdB = da*dEdB + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;
+        torsionEnergy = da*torsionEnergy + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;
+        dEdA = db*dEdA + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;
+        dEdB = da*dEdB + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;
+        torsionEnergy = da*torsionEnergy + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;
+        dEdA = db*dEdA + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;
+        dEdB = da*dEdB + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;
+        torsionEnergy = da*torsionEnergy + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;
+        dEdA = db*dEdA + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;
+        dEdB = da*dEdB + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;
+        dEdA /= delta;
+        dEdB /= delta;
+        energy += torsionEnergy;
+        // Apply the force to the first torsion.
+        float normCross1 = DOT3(cp0a, cp0a);
+        float normSqrBC = DOT3(v1a, v1a);
+        float normBC = sqrt(normSqrBC);
+        float normCross2 = DOT3(cp1a, cp1a);
+        float dp = 1.0f/normSqrBC;
+        float4 ff = make_float4((-dEdA*normBC)/normCross1, DOT3(v0a, v1a)*dp, DOT3(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
+        float3 internalF0 = make_float3(ff.x*cp0a.x, ff.x*cp0a.y, ff.x*cp0a.z);
+        float3 internalF3 = make_float3(ff.w*cp1a.x, ff.w*cp1a.y, ff.w*cp1a.z);
+        float3 d = make_float3(ff.y*internalF0.x - ff.z*internalF3.x,
+                               ff.y*internalF0.y - ff.z*internalF3.y,
+                               ff.y*internalF0.z - ff.z*internalF3.z);
+        unsigned int offsetA = atoms1.x+atoms3.x*numAtoms;
+        unsigned int offsetB = atoms1.y+atoms3.y*numAtoms;
+        unsigned int offsetC = atoms1.z+atoms3.z*numAtoms;
+        unsigned int offsetD = atoms1.w+atoms3.w*numAtoms;
+        float4 forceA = forceBuffers[offsetA];
+        float4 forceB = forceBuffers[offsetB];
+        float4 forceC = forceBuffers[offsetC];
+        float4 forceD = forceBuffers[offsetD];
+        forceA.x += internalF0.x;
+        forceA.y += internalF0.y;
+        forceA.z += internalF0.z;
+        forceB.x += d.x-internalF0.x;
+        forceB.y += d.y-internalF0.y;
+        forceB.z += d.z-internalF0.z;
+        forceC.x += -d.x-internalF3.x;
+        forceC.y += -d.y-internalF3.y;
+        forceC.z += -d.z-internalF3.z;
+        forceD.x += internalF3.x;
+        forceD.y += internalF3.y;
+        forceD.z += internalF3.z;
+        forceBuffers[offsetA] = forceA;
+        forceBuffers[offsetB] = forceB;
+        forceBuffers[offsetC] = forceC;
+        forceBuffers[offsetD] = forceD;
+        // Apply the force to the second torsion.
+        normCross1 = DOT3(cp0b, cp0b);
+        normSqrBC = DOT3(v1b, v1b);
+        normBC = sqrt(normSqrBC);
+        normCross2 = DOT3(cp1b, cp1b);
+        dp = 1.0f/normSqrBC;
+        ff = make_float4((-dEdB*normBC)/normCross1, DOT3(v0b, v1b)*dp, DOT3(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
+        internalF0 = make_float3(ff.x*cp0b.x, ff.x*cp0b.y, ff.x*cp0b.z);
+        internalF3 = make_float3(ff.w*cp1b.x, ff.w*cp1b.y, ff.w*cp1b.z);
+        d = make_float3(ff.y*internalF0.x - ff.z*internalF3.x,
+                        ff.y*internalF0.y - ff.z*internalF3.y,
+                        ff.y*internalF0.z - ff.z*internalF3.z);
+        offsetA = atoms2.x+atoms4.x*numAtoms;
+        offsetB = atoms2.y+atoms4.y*numAtoms;
+        offsetC = atoms2.z+atoms4.z*numAtoms;
+        offsetD = atoms2.w+atoms4.w*numAtoms;
+        forceA = forceBuffers[offsetA];
+        forceB = forceBuffers[offsetB];
+        forceC = forceBuffers[offsetC];
+        forceD = forceBuffers[offsetD];
+        forceA.x += internalF0.x;
+        forceA.y += internalF0.y;
+        forceA.z += internalF0.z;
+        forceB.x += d.x-internalF0.x;
+        forceB.y += d.y-internalF0.y;
+        forceB.z += d.z-internalF0.z;
+        forceC.x += -d.x-internalF3.x;
+        forceC.y += -d.y-internalF3.y;
+        forceC.z += -d.z-internalF3.z;
+        forceD.x += internalF3.x;
+        forceD.y += internalF3.y;
+        forceD.z += internalF3.z;
+        forceBuffers[offsetA] = forceA;
+        forceBuffers[offsetB] = forceB;
+        forceBuffers[offsetC] = forceC;
+        forceBuffers[offsetD] = forceD;
+    }
+    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
+}
+void kCalculateCMAPTorsionForces(gpuContext gpu, CUDAStream<float4>& coefficients, CUDAStream<int2>& mapPositions, CUDAStream<int4>& torsionIndices, CUDAStream<int>& torsionMaps)
+{
+    kCalculateCMAPTorsionForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block>>>(gpu->sim.stride,
+            torsionMaps._length, gpu->sim.pForce4, gpu->sim.pEnergy, gpu->sim.pPosq, coefficients._pDevData,
+            mapPositions._pDevData, torsionIndices._pDevData, torsionMaps._pDevData);
+    LAUNCHERROR("kCalculateCMAPTorsionForces");
+}
--- a/platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
+++ b/platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+/**
+ * This tests the Cuda implementation of CMAPTorsionForce.
+ */
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "CudaPlatform.h"
+#include "openmm/CMAPTorsionForce.h"
+#include "openmm/PeriodicTorsionForce.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "sfmt/SFMT.h"
+#include <iostream>
+#include <vector>
+using namespace OpenMM;
+using namespace std;
+const double TOL = 1e-5;
+void testCMAPTorsions() {
+    const int mapSize = 36;
+    // Create two systems: one with a pair of periodic torsions, and one with a CMAP torsion
+    // that approximates the same force.
+    CudaPlatform platform;
+    System system1;
+    for (int i = 0; i < 5; i++)
+        system1.addParticle(1.0);
+    PeriodicTorsionForce* periodic = new PeriodicTorsionForce();
+    periodic->addTorsion(0, 1, 2, 3, 2, M_PI/4, 1.5);
+    periodic->addTorsion(1, 2, 3, 4, 3, M_PI/3, 2.0);
+    system1.addForce(periodic);
+    System system2;
+    for (int i = 0; i < 5; i++)
+        system2.addParticle(1.0);
+    CMAPTorsionForce* cmap = new CMAPTorsionForce();
+    vector<double> mapEnergy(mapSize*mapSize);
+    for (int i = 0; i < mapSize; i++) {
+        double angle1 = i*2*M_PI/mapSize;
+        double energy1 = 1.5*(1+cos(2*angle1-M_PI/4));
+        for (int j = 0; j < mapSize; j++) {
+            double angle2 = j*2*M_PI/mapSize;
+            double energy2 = 2.0*(1+cos(3*angle2-M_PI/3));
+            mapEnergy[i+j*mapSize] = energy1+energy2;
+        }
+    }
+    cmap->addMap(mapSize, mapEnergy);
+    cmap->addTorsion(0, 0, 1, 2, 3, 1, 2, 3, 4);
+    system2.addForce(cmap);
+    // Set the atoms in various positions, and verify that both systems give equal forces and energy.
+    OpenMM_SFMT::SFMT sfmt;
+    init_gen_rand(0, sfmt);
+    vector<Vec3> positions(5);
+    VerletIntegrator integrator1(0.01);
+    VerletIntegrator integrator2(0.01);
+    vector<Vec3> f1, f2;
+    double energy1, energy2;
+    for (int i = 0; i < 10; i++) {
+        for (int j = 0; j < (int) positions.size(); j++)
+            positions[j] = Vec3(5.0*genrand_real2(sfmt), 5.0*genrand_real2(sfmt), 5.0*genrand_real2(sfmt));
+        {
+            Context context(system1, integrator1, platform);
+            context.setPositions(positions);
+            State state = context.getState(State::Forces | State::Energy);
+            f1 = state.getForces();
+            energy1 = state.getPotentialEnergy();
+        }
+        {
+            Context context(system2, integrator2, platform);
+            context.setPositions(positions);
+            State state = context.getState(State::Forces | State::Energy);
+            f2 = state.getForces();
+            energy2 = state.getPotentialEnergy();
+        }
+        for (int i = 0; i < system1.getNumParticles(); i++)
+            ASSERT_EQUAL_VEC(f1[i], f2[i], 0.05);
+        ASSERT_EQUAL_TOL(energy1, energy2, 1e-3);
+    }
+}
+int main() {
+    try {
+        testCMAPTorsions();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}