Created CUDA implementation of CustomExternalForce

77d3b647 · Peter Eastman · 20b02641 · 77d3b647 · 77d3b647 · 77d3b647
Commit 77d3b647 authored Nov 17, 2009 by Peter Eastman
11 changed files
--- a/platforms/cuda/src/CudaKernelFactory.cpp
+++ b/platforms/cuda/src/CudaKernelFactory.cpp
@@ -55,6 +55,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
        return new CudaCalcGBSAOBCForceKernel(name, platform, data);
    if (name == CalcGBVIForceKernel::Name())
        return new CudaCalcGBVIForceKernel(name, platform, data);
+    if (name == CalcCustomExternalForceKernel::Name())
+        return new CudaCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
    if (name == IntegrateVerletStepKernel::Name())
        return new CudaIntegrateVerletStepKernel(name, platform, data);
    if (name == IntegrateLangevinStepKernel::Name())

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -593,6 +593,52 @@ double CudaCalcGBVIForceKernel::executeEnergy(ContextImpl& context) {
    return 0.0;
 }

+CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
+}
+
+void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
+    numParticles = force.getNumParticles();
+    vector<int> particle(numParticles);
+    vector<vector<double> > params(numParticles);
+    for (int i = 0; i < numParticles; i++)
+        force.getParticleParameters(i, particle[i], params[i]);
+    vector<string> paramNames;
+    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
+        paramNames.push_back(force.getPerParticleParameterName(i));
+    globalParamNames.resize(force.getNumGlobalParameters());
+    globalParamValues.resize(force.getNumGlobalParameters());
+    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
+        globalParamNames[i] = force.getGlobalParameterName(i);
+        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
+    }
+    gpuSetCustomExternalParameters(data.gpu, particle, params, force.getEnergyFunction(), paramNames, globalParamNames);
+    if (globalParamValues.size() > 0)
+        SetCustomExternalGlobalParams(&globalParamValues[0]);
+}
+
+void CudaCalcCustomExternalForceKernel::executeForces(ContextImpl& context) {
+    updateGlobalParams(context);
+    kCalculateCustomExternalForces(data.gpu);
+}
+
+double CudaCalcCustomExternalForceKernel::executeEnergy(ContextImpl& context) {
+    updateGlobalParams(context);
+    kCalculateCustomExternalForces(data.gpu);
+    return 0.0;
+}
+
+void CudaCalcCustomExternalForceKernel::updateGlobalParams(ContextImpl& context) {
+    bool changed = false;
+    for (int i = 0; i < (int) globalParamNames.size(); i++) {
+        float value = (float) context.getParameter(globalParamNames[i]);
+        if (value != globalParamValues[i])
+            changed = true;
+        globalParamValues[i] = value;
+    }
+    if (changed)
+        SetCustomExternalGlobalParams(&globalParamValues[0]);
+}
+
 static void initializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {

    // Initialize any terms that haven't already been handled by a Force.

--- a/platforms/cuda/src/CudaKernels.h
+++ b/platforms/cuda/src/CudaKernels.h
@@ -460,6 +460,44 @@ private:
    CudaPlatform::PlatformData& data;
 };

+/**
+ * This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
+ */
+class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
+public:
+    CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) : CalcCustomExternalForceKernel(name, platform),
+            data(data), system(system) {
+    }
+    ~CudaCalcCustomExternalForceKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param force      the CustomExternalForce this kernel will be used for
+     */
+    void initialize(const System& system, const CustomExternalForce& force);
+    /**
+     * Execute the kernel to calculate the forces.
+     *
+     * @param context    the context in which to execute this kernel
+     */
+    void executeForces(ContextImpl& context);
+    /**
+     * Execute the kernel to calculate the energy.
+     *
+     * @param context    the context in which to execute this kernel
+     * @return the potential energy due to the CustomExternalForce
+     */
+    double executeEnergy(ContextImpl& context);
+private:
+    void updateGlobalParams(ContextImpl& context);
+    int numParticles;
+    CudaPlatform::PlatformData& data;
+    std::vector<std::string> globalParamNames;
+    std::vector<float> globalParamValues;
+    System& system;
+};
+
 /**
 * This kernel is invoked by VerletIntegrator to take one time step.
 */

--- a/platforms/cuda/src/CudaPlatform.cpp
+++ b/platforms/cuda/src/CudaPlatform.cpp
@@ -56,7 +56,8 @@ CudaPlatform::CudaPlatform() {
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
-    registerKernelFactory(CudaCalcGBVIForceKernel::Name(), factory);
+    registerKernelFactory(CalcGBVIForceKernel::Name(), factory);
+    registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);

--- a/platforms/cuda/src/kernels/cudaKernels.h
+++ b/platforms/cuda/src/kernels/cudaKernels.h
@@ -43,6 +43,7 @@ extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
 extern void kCalculateCDLJGBVIForces1(gpuContext gpu);
 extern void kCalculateCDLJForces(gpuContext gpu);
 extern void kCalculateCustomBondForces(gpuContext gpu);
+extern void kCalculateCustomExternalForces(gpuContext gpu);
 extern void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid);
 extern void kReduceObcGbsaBornForces(gpuContext gpu);
 extern void kCalculateObcGbsaForces2(gpuContext gpu);
@@ -76,6 +77,8 @@ extern void SetCalculateCDLJForcesSim(gpuContext gpu);
 extern void GetCalculateCDLJForcesSim(gpuContext gpu);
 extern void SetCalculateCustomBondForcesSim(gpuContext gpu);
 extern void GetCalculateCustomBondForcesSim(gpuContext gpu);
+extern void SetCalculateCustomExternalForcesSim(gpuContext gpu);
+extern void GetCalculateCustomExternalForcesSim(gpuContext gpu);
 extern void SetCalculateCustomNonbondedForcesSim(gpuContext gpu);
 extern void GetCalculateCustomNonbondedForcesSim(gpuContext gpu);
 extern void SetCalculateLocalForcesSim(gpuContext gpu);
@@ -111,6 +114,9 @@ extern void GetRandomSim(gpuContext gpu);
 extern void SetCustomBondForceExpression(const Expression<128>& expression);
 extern void SetCustomBondEnergyExpression(const Expression<128>& expression);
 extern void SetCustomBondGlobalParams(float* paramValues);
+extern void SetCustomExternalForceExpressions(const Expression<128>& expressionX, const Expression<128>& expressionY, const Expression<128>& expressionZ);
+extern void SetCustomExternalEnergyExpression(const Expression<128>& expression);
+extern void SetCustomExternalGlobalParams(float* paramValues);
 extern void SetCustomNonbondedForceExpression(const Expression<128>& expression);
 extern void SetCustomNonbondedEnergyExpression(const Expression<128>& expression);
 extern void SetCustomNonbondedGlobalParams(float* paramValues);
--- a/platforms/cuda/src/kernels/cudatypes.h
+++ b/platforms/cuda/src/kernels/cudatypes.h
@@ -360,6 +360,10 @@ struct cudaGmxSimulation {
    float4*         pCustomBondParams;              // Parameters for custom bonds
    unsigned int    customBonds;                    // Number of custom bonds
    unsigned int    customBondParameters;           // Number of parameters for custom bonds
+    int*            pCustomExternalID;              // Atom indices for custom external force
+    float4*         pCustomExternalParams;          // Parameters for custom external force
+    unsigned int    customExternals;                // Number of particles for custom external force
+    unsigned int    customExternalParameters;       // Number of parameters for custom external force
    float4*         pTabulatedFunctionCoefficients[MAX_TABULATED_FUNCTIONS]; // The spline coefficients for each tabulated function
    float4*         pTabulatedFunctionParams;       // The min, max, and spacing for each tabulated function
    float2*         pEwaldCosSinSum;                // Pointer to the cos/sin sums (ewald)

--- a/platforms/cuda/src/kernels/gpu.cpp
+++ b/platforms/cuda/src/kernels/gpu.cpp
@@ -693,6 +693,50 @@ void gpuSetCustomBondParameters(gpuContext gpu, const vector<int>& bondAtom1, co
    SetCustomBondForceExpression(createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
 }

+extern "C"
+void gpuSetCustomExternalParameters(gpuContext gpu, const vector<int>& atomIndex, const vector<vector<double> >& atomParams,
+            const string& energyExp, const vector<string>& paramNames, const vector<string>& globalParamNames)
+{
+    if (paramNames.size() > 4)
+        throw OpenMMException("CudaPlatform only supports four per-particle parameters for custom external forces");
+    if (globalParamNames.size() > 8)
+        throw OpenMMException("CudaPlatform only supports eight global parameters for custom external forces");
+    if (gpu->psCustomExternalID != NULL)
+        throw OpenMMException("CudaPlatform only supports a single CustomExternalForce per System");
+    gpu->sim.customExternals = atomIndex.size();
+    gpu->sim.customExternalParameters = paramNames.size();
+    gpu->psCustomExternalID = new CUDAStream<int>(gpu->sim.customExternals, 1, "CustomExternalId");
+    gpu->sim.pCustomExternalID = gpu->psCustomExternalID->_pDevData;
+    gpu->psCustomExternalParams = new CUDAStream<float4>(gpu->sim.customExternals, 1, "CustomExternalParams");
+    gpu->sim.pCustomExternalParams = gpu->psCustomExternalParams->_pDevData;
+    for (int i = 0; i < (int) atomIndex.size(); i++) {
+        (*gpu->psCustomExternalID)[i] = atomIndex[i];
+        if (atomParams[i].size() > 0)
+            (*gpu->psCustomExternalParams)[i].x = atomParams[i][0];
+        if (atomParams[i].size() > 1)
+            (*gpu->psCustomExternalParams)[i].y = atomParams[i][1];
+        if (atomParams[i].size() > 2)
+            (*gpu->psCustomExternalParams)[i].z = atomParams[i][2];
+        if (atomParams[i].size() > 3)
+            (*gpu->psCustomExternalParams)[i].w = atomParams[i][3];
+    }
+    gpu->psCustomExternalID->Upload();
+    gpu->psCustomExternalParams->Upload();
+
+    // Create the Expressions.
+
+    vector<string> variables;
+    variables.push_back("x");
+    variables.push_back("y");
+    variables.push_back("z");
+    for (int i = 0; i < (int) paramNames.size(); i++)
+        variables.push_back(paramNames[i]);
+    SetCustomExternalEnergyExpression(createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
+    SetCustomExternalForceExpressions(createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("x").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize),
+                                  createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("y").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize),
+                                  createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("z").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
+}
+
 extern "C"
 void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> >& parameters, const vector<vector<int> >& exclusions,
            CudaNonbondedMethod method, float cutoffDistance, const string& energyExp,
@@ -1781,6 +1825,8 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
    gpu->psCustomExceptionParams    = NULL;
    gpu->psCustomBondID             = NULL;
    gpu->psCustomBondParams         = NULL;
+    gpu->psCustomExternalID         = NULL;
+    gpu->psCustomExternalParams     = NULL;
    gpu->psEwaldCosSinSum           = NULL;
    gpu->psTabulatedErfc            = NULL;
    gpu->psPmeGrid                  = NULL;
@@ -1952,6 +1998,10 @@ void gpuShutDown(gpuContext gpu)
        delete gpu->psCustomBondID;
        delete gpu->psCustomBondParams;
    }
+    if (gpu->psCustomExternalParams != NULL) {
+        delete gpu->psCustomExternalID;
+        delete gpu->psCustomExternalParams;
+    }
    if (gpu->psEwaldCosSinSum != NULL)
        delete gpu->psEwaldCosSinSum;
    if (gpu->psPmeGrid != NULL) {
@@ -2318,6 +2368,7 @@ int gpuSetConstants(gpuContext gpu)
    SetCalculateCDLJObcGbsaForces1Sim(gpu);
    SetCalculateCustomNonbondedForcesSim(gpu);
    SetCalculateCustomBondForcesSim(gpu);
+    SetCalculateCustomExternalForcesSim(gpu);
    SetCalculateLocalForcesSim(gpu);
    SetCalculateObcGbsaBornSumSim(gpu);
    SetCalculateGBVIBornSumSim(gpu);

--- a/platforms/cuda/src/kernels/gputypes.h
+++ b/platforms/cuda/src/kernels/gputypes.h
@@ -106,6 +106,8 @@ struct _gpuContext {
    CUDAStream<float4>* psCustomExceptionParams; // Parameters for custom nonbonded exceptions
    CUDAStream<int4>* psCustomBondID;             // Atom indices for custom bonds
    CUDAStream<float4>* psCustomBondParams;       // Parameters for custom bonds
+    CUDAStream<int>* psCustomExternalID;          // Atom indices for custom external force
+    CUDAStream<float4>* psCustomExternalParams;   // Parameters for custom external force
    CUDAStream<float4>* psTabulatedFunctionParams; // The min, max, and spacing for each tabulated function
    CUDAStream<float2>* psEwaldCosSinSum;
    CUDAStream<float>* psTabulatedErfc;     // Tabulated values for erfc()
@@ -212,6 +214,10 @@ extern "C"
 void gpuSetCustomBondParameters(gpuContext gpu, const std::vector<int>& bondAtom1, const std::vector<int>& bondAtom2, const std::vector<std::vector<double> >& bondParams,
            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);

+extern "C"
+void gpuSetCustomExternalParameters(gpuContext gpu, const std::vector<int>& atomIndex, const std::vector<std::vector<double> >& atomParams,
+            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
+
 extern "C"
 void gpuSetCustomNonbondedParameters(gpuContext gpu, const std::vector<std::vector<double> >& parameters, const std::vector<std::vector<int> >& exclusions,
            CudaNonbondedMethod method, float cutoffDistance, const std::string& energyExp,

--- a/platforms/cuda/src/kernels/kCalculateCustomExternalForces.cu
+++ b/platforms/cuda/src/kernels/kCalculateCustomExternalForces.cu
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Scott Le Grand, Peter Eastman                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include <stdio.h>
+#include <cuda.h>
+#include <vector_functions.h>
+#include <cstdlib>
+#include <string>
+#include <iostream>
+#include <fstream>
+using namespace std;
+
+#include "gputypes.h"
+#include "cudatypes.h"
+
+static __constant__ cudaGmxSimulation cSim;
+static __constant__ Expression<128> forceExpX;
+static __constant__ Expression<128> forceExpY;
+static __constant__ Expression<128> forceExpZ;
+static __constant__ Expression<128> energyExp;
+
+#include "kEvaluateExpression.h"
+
+void SetCalculateCustomExternalForcesSim(gpuContext gpu)
+{
+    cudaError_t status;
+    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
+    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
+}
+
+void GetCalculateCustomExternalForcesSim(gpuContext gpu)
+{
+    cudaError_t status;
+    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
+    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
+}
+
+void SetCustomExternalForceExpressions(const Expression<128>& expressionX, const Expression<128>& expressionY, const Expression<128>& expressionZ)
+{
+    cudaError_t status;
+    status = cudaMemcpyToSymbol(forceExpX, &expressionX, sizeof(forceExpX));
+    status = cudaMemcpyToSymbol(forceExpY, &expressionY, sizeof(forceExpY));
+    status = cudaMemcpyToSymbol(forceExpZ, &expressionZ, sizeof(forceExpZ));
+    RTERROR(status, "SetCustomExternalForceExpression: cudaMemcpyToSymbol failed");
+}
+
+void SetCustomExternalEnergyExpression(const Expression<128>& expression)
+{
+    cudaError_t status;
+    status = cudaMemcpyToSymbol(energyExp, &expression, sizeof(energyExp));
+    RTERROR(status, "SetCustomExternalEnergyExpression: cudaMemcpyToSymbol failed");
+}
+
+void SetCustomExternalGlobalParams(float* paramValues)
+{
+    cudaError_t status;
+    status = cudaMemcpyToSymbol(globalParams, paramValues, sizeof(globalParams));
+    RTERROR(status, "SetCustomExternalGlobalParams: cudaMemcpyToSymbol failed");
+}
+
+
+__global__ void kCalculateCustomExternalForces_kernel()
+{
+    extern __shared__ float stack[];
+    float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x];
+    unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+    float totalEnergy = 0.0f;
+
+    while (index < cSim.customExternals)
+    {
+        int atom        = cSim.pCustomExternalID[index];
+        float4 params   = cSim.pCustomExternalParams[index];
+        float4 pos      = cSim.pPosq[atom];
+        VARIABLE(0)     = pos.x;
+        VARIABLE(1)     = pos.y;
+        VARIABLE(2)     = pos.z;
+        VARIABLE(3)     = params.x;
+        VARIABLE(4)     = params.y;
+        VARIABLE(5)     = params.z;
+        VARIABLE(6)     = params.w;
+        totalEnergy       += kEvaluateExpression_kernel(&energyExp, stack, variables);;
+        float4 force       = cSim.pForce4[atom];
+        force.x           -= kEvaluateExpression_kernel(&forceExpX, stack, variables);
+        force.y           -= kEvaluateExpression_kernel(&forceExpY, stack, variables);
+        force.z           -= kEvaluateExpression_kernel(&forceExpZ, stack, variables);
+        cSim.pForce4[atom] = force;
+        index             += blockDim.x * gridDim.x;
+    }
+    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
+}
+
+void kCalculateCustomExternalForces(gpuContext gpu)
+{
+//    printf("kCalculateCustomExternalForces\n");
+    kCalculateCustomExternalForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block,
+            gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.localForces_threads_per_block>>>();
+    LAUNCHERROR("kCalculateCustomExternalForces");
+}
--- a/platforms/cuda/tests/TestCudaCustomExternalForce.cpp
+++ b/platforms/cuda/tests/TestCudaCustomExternalForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the Cuda implementation of CustomExternalForce.
+ */
+
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "CudaPlatform.h"
+#include "openmm/CustomExternalForce.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+const double TOL = 1e-5;
+
+void testForce() {
+    CudaPlatform platform;
+    System system;
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    VerletIntegrator integrator(0.01);
+    CustomExternalForce* forceField = new CustomExternalForce("scale*(x+yscale*(y-y0)^2)");
+    forceField->addPerParticleParameter("y0");
+    forceField->addPerParticleParameter("yscale");
+    forceField->addGlobalParameter("scale", 0.5);
+    vector<double> parameters(2);
+    parameters[0] = 0.5;
+    parameters[1] = 2.0;
+    forceField->addParticle(0, parameters);
+    parameters[0] = 1.5;
+    parameters[1] = 3.0;
+    forceField->addParticle(2, parameters);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(3);
+    positions[0] = Vec3(0, 2, 0);
+    positions[1] = Vec3(0, 0, 1);
+    positions[2] = Vec3(1, 0, 1);
+    context.setPositions(positions);
+    State state = context.getState(State::Forces | State::Energy);
+    const vector<Vec3>& forces = state.getForces();
+    ASSERT_EQUAL_VEC(Vec3(-0.5, -0.5*2.0*2.0*1.5, 0), forces[0], TOL);
+    ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[1], TOL);
+    ASSERT_EQUAL_VEC(Vec3(-0.5, 0.5*3.0*2.0*1.5, 0), forces[2], TOL);
+    ASSERT_EQUAL_TOL(0.5*(1.0 + 2.0*1.5*1.5 + 3.0*1.5*1.5), state.getPotentialEnergy(), TOL);
+}
+
+int main() {
+    try {
+        testForce();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}
--- a/platforms/opencl/src/kernels/customNonbondedExceptions.cl
+++ b/platforms/opencl/src/kernels/customNonbondedExceptions.cl
-/**
- * Compute custom nonbonded exceptions.
- */
-
-__kernel void computeCustomNonbondedExceptions(int numAtoms, int numExceptions, __global float4* forceBuffers, __global float* energyBuffer,
-        __global float4* posq, __global float4* params, __global int4* indices
-        EXTRA_ARGUMENTS) {
-    int index = get_global_id(0);
-    float energy = 0.0f;
-    while (index < numExceptions) {
-        // Look up the data for this exception.
-
-        int4 atoms = indices[index];
-        float4 exceptionParams = params[index];
-        float4 delta = posq[atoms.y]-posq[atoms.x];
-#ifdef USE_PERIODIC
-        delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
-        delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
-        delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
-#endif
-        // Compute the force.
-
-        float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
-#ifdef USE_CUTOFF
-        if (r2 > CUTOFF_SQUARED) {
-#else
-        {
-#endif
-            float r = sqrt(r2);
-            float dEdR;
-            COMPUTE_FORCE
-            delta.xyz *= -dEdR/r;
-
-            // Record the force on each of the two atoms.
-
-            unsigned int offsetA = atoms.x+atoms.z*numAtoms;
-            unsigned int offsetB = atoms.y+atoms.w*numAtoms;
-            float4 forceA = forceBuffers[offsetA];
-            float4 forceB = forceBuffers[offsetB];
-            forceA.xyz -= delta.xyz;
-            forceB.xyz += delta.xyz;
-            forceBuffers[offsetA] = forceA;
-            forceBuffers[offsetB] = forceB;
-        }
-        index += get_global_size(0);
-    }
-    energyBuffer[get_global_id(0)] += energy;
-}