Commit 5a06df78 authored by tic20's avatar tic20
Browse files
parents 8dd60914 a9223eea
#ifndef OPENMM_OPENCLDRUDEKERNELSOURCES_H_
#define OPENMM_OPENCLDRUDEKERNELSOURCES_H_
#ifndef OPENMM_COMMONDRUDEKERNELSOURCES_H_
#define OPENMM_COMMONDRUDEKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
......@@ -32,16 +32,16 @@
namespace OpenMM {
/**
* This class is a central holding place for the source code of OpenCL kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* This class is a central holding place for the source code of device kernels.
* The CMake build script inserts declarations into it based on the .cc files in the
* kernels subfolder.
*/
class OpenCLDrudeKernelSources {
class CommonDrudeKernelSources {
public:
@CL_FILE_DECLARATIONS@
@KERNEL_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLDRUDEKERNELSOURCES_H_*/
#endif /*OPENMM_COMMONDRUDEKERNELSOURCES_H_*/
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Portions copyright (c) 2013-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -29,29 +29,22 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLDrudeKernels.h"
#include "OpenCLDrudeKernelSources.h"
#include "CommonDrudeKernels.h"
#include "CommonDrudeKernelSources.h"
#include "openmm/internal/ContextImpl.h"
#include "OpenCLBondedUtilities.h"
#include "OpenCLForceInfo.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLKernelSources.h"
#include "openmm/common/BondedUtilities.h"
#include "openmm/common/ComputeForceInfo.h"
#include "openmm/common/IntegrationUtilities.h"
#include "CommonKernelSources.h"
#include "SimTKOpenMMRealType.h"
#include <set>
using namespace OpenMM;
using namespace std;
static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseMixedPrecision())
kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
else
kernel.setArg<void*>(index, NULL);
}
class OpenCLDrudeForceInfo : public OpenCLForceInfo {
class CommonDrudeForceInfo : public ComputeForceInfo {
public:
OpenCLDrudeForceInfo(const DrudeForce& force) : OpenCLForceInfo(0), force(force) {
CommonDrudeForceInfo(const DrudeForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumParticles()+force.getNumScreenedPairs();
......@@ -107,15 +100,16 @@ private:
const DrudeForce& force;
};
void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
if (cl.getContextIndex() != 0)
void CommonCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
cc.setAsCurrent();
if (cc.getContextIndex() != 0)
return; // This is run entirely on one device
int numParticles = force.getNumParticles();
if (numParticles > 0) {
// Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams.initialize<mm_float4>(cl, numParticles, "drudeParticleParams");
particleParams.initialize<mm_float4>(cc, numParticles, "drudeParticleParams");
vector<mm_float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34;
......@@ -139,15 +133,15 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
}
particleParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
replacements["PARAMS"] = cc.getBondedUtilities().addArgument(particleParams, "float4");
cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
}
int numPairs = force.getNumScreenedPairs();
if (numPairs > 0) {
// Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams.initialize<mm_float2>(cl, numPairs, "drudePairParams");
pairParams.initialize<mm_float2>(cc, numPairs, "drudePairParams");
vector<mm_float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
int drude1, drude2;
......@@ -163,18 +157,18 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
}
pairParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams.getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
replacements["PARAMS"] = cc.getBondedUtilities().addArgument(pairParams, "float2");
cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLDrudeForceInfo(force));
cc.addForce(new CommonDrudeForceInfo(force));
}
double OpenCLCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CommonCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
if (cl.getContextIndex() != 0)
void CommonCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
if (cc.getContextIndex() != 0)
return; // This is run entirely on one device
// Set the particle parameters.
......@@ -226,9 +220,9 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
}
}
void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
void CommonIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cc.initializeContexts();
cc.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Identify particle pairs and ordinary particles.
......@@ -246,8 +240,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
pairParticleVec.push_back(mm_int2(p, p1));
}
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles.initialize<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles.initialize<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
normalParticles.initialize<int>(cc, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles.initialize<mm_int2>(cc, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0)
normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0)
......@@ -256,61 +250,67 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
// Create kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["NUM_NORMAL_PARTICLES"] = cl.intToString(normalParticleVec.size());
defines["NUM_PAIRS"] = cl.intToString(pairParticleVec.size());
defines["NUM_ATOMS"] = cc.intToString(cc.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms());
defines["NUM_NORMAL_PARTICLES"] = cc.intToString(normalParticleVec.size());
defines["NUM_PAIRS"] = cc.intToString(pairParticleVec.size());
map<string, string> replacements;
cl::Program program = cl.createProgram(OpenCLDrudeKernelSources::drudeLangevin, defines, "");
kernel1 = cl::Kernel(program, "integrateDrudeLangevinPart1");
kernel2 = cl::Kernel(program, "integrateDrudeLangevinPart2");
hardwallKernel = cl::Kernel(program, "applyHardWallConstraints");
ComputeProgram program = cc.compileProgram(CommonDrudeKernelSources::drudeLangevin, defines);
kernel1 = program->createKernel("integrateDrudeLangevinPart1");
kernel2 = program->createKernel("integrateDrudeLangevinPart2");
hardwallKernel = program->createKernel("applyHardWallConstraints");
prevStepSize = -1.0;
}
void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms();
void CommonIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
cc.setAsCurrent();
IntegrationUtilities& integration = cc.getIntegrationUtilities();
int numAtoms = cc.getNumAtoms();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, normalParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(4, pairParticles.getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
if (cl.getUseMixedPrecision())
kernel2.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
kernel1->addArg(cc.getVelm());
kernel1->addArg(cc.getLongForceBuffer());
kernel1->addArg(integration.getPosDelta());
kernel1->addArg(normalParticles);
kernel1->addArg(pairParticles);
kernel1->addArg(integration.getStepSize());
for (int i = 0; i < 6; i++)
kernel1->addArg();
kernel1->addArg(integration.getRandom());
kernel1->addArg();
kernel2->addArg(cc.getPosq());
if (cc.getUseMixedPrecision())
kernel2->addArg(cc.getPosqCorrection());
else
kernel2.setArg<void*>(1, NULL);
kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
if (cl.getUseMixedPrecision())
hardwallKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
kernel2->addArg(NULL);
kernel2->addArg(integration.getPosDelta());
kernel2->addArg(cc.getVelm());
kernel2->addArg(integration.getStepSize());
hardwallKernel->addArg(cc.getPosq());
if (cc.getUseMixedPrecision())
hardwallKernel->addArg(cc.getPosqCorrection());
else
hardwallKernel.setArg<void*>(1, NULL);
hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(3, pairParticles.getDeviceBuffer());
hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
hardwallKernel->addArg(NULL);
hardwallKernel->addArg(cc.getVelm());
hardwallKernel->addArg(pairParticles);
hardwallKernel->addArg(integration.getStepSize());
hardwallKernel->addArg();
hardwallKernel->addArg();
}
// Compute integrator coefficients.
double stepSize = integrator.getStepSize();
double vscale = exp(-stepSize*integrator.getFriction());
double fscale = (1-vscale)/integrator.getFriction();
double fscale = (1-vscale)/integrator.getFriction()/(double) 0x100000000;
double noisescale = sqrt(2*BOLTZ*integrator.getTemperature()*integrator.getFriction())*sqrt(0.5*(1-vscale*vscale)/integrator.getFriction());
double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction();
double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
double maxDrudeDistance = integrator.getMaxDrudeDistance();
double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
if (stepSize != prevStepSize) {
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
mm_double2 ss = mm_double2(0, stepSize);
integration.getStepSize().upload(&ss);
}
......@@ -320,31 +320,31 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
}
prevStepSize = stepSize;
}
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
kernel1.setArg<cl_double>(6, vscale);
kernel1.setArg<cl_double>(7, fscale);
kernel1.setArg<cl_double>(8, noisescale);
kernel1.setArg<cl_double>(9, vscaleDrude);
kernel1.setArg<cl_double>(10, fscaleDrude);
kernel1.setArg<cl_double>(11, noisescaleDrude);
hardwallKernel.setArg<cl_double>(5, maxDrudeDistance);
hardwallKernel.setArg<cl_double>(6, hardwallscaleDrude);
if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
kernel1->setArg(6, vscale);
kernel1->setArg(7, fscale);
kernel1->setArg(8, noisescale);
kernel1->setArg(9, vscaleDrude);
kernel1->setArg(10, fscaleDrude);
kernel1->setArg(11, noisescaleDrude);
hardwallKernel->setArg(5, maxDrudeDistance);
hardwallKernel->setArg(6, hardwallscaleDrude);
}
else {
kernel1.setArg<cl_float>(6, (cl_float) vscale);
kernel1.setArg<cl_float>(7, (cl_float) fscale);
kernel1.setArg<cl_float>(8, (cl_float) noisescale);
kernel1.setArg<cl_float>(9, (cl_float) vscaleDrude);
kernel1.setArg<cl_float>(10, (cl_float) fscaleDrude);
kernel1.setArg<cl_float>(11, (cl_float) noisescaleDrude);
hardwallKernel.setArg<cl_float>(5, (cl_float) maxDrudeDistance);
hardwallKernel.setArg<cl_float>(6, (cl_float) hardwallscaleDrude);
kernel1->setArg(6, (float) vscale);
kernel1->setArg(7, (float) fscale);
kernel1->setArg(8, (float) noisescale);
kernel1->setArg(9, (float) vscaleDrude);
kernel1->setArg(10, (float) fscaleDrude);
kernel1->setArg(11, (float) noisescaleDrude);
hardwallKernel->setArg(5, (float) maxDrudeDistance);
hardwallKernel->setArg(6, (float) hardwallscaleDrude);
}
// Call the first integration kernel.
kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
cl.executeKernel(kernel1, numAtoms);
kernel1->setArg(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
kernel1->execute(numAtoms);
// Apply constraints.
......@@ -352,32 +352,33 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
// Call the second integration kernel.
cl.executeKernel(kernel2, numAtoms);
kernel2->execute(numAtoms);
// Apply hard wall constraints.
if (maxDrudeDistance > 0)
cl.executeKernel(hardwallKernel, pairParticles.getSize());
hardwallKernel->execute(pairParticles.getSize());
integration.computeVirtualSites();
// Update the time and step count.
cl.setTime(cl.getTime()+stepSize);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
cc.setTime(cc.getTime()+stepSize);
cc.setStepCount(cc.getStepCount()+1);
cc.reorderAtoms();
}
double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
double CommonIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cc.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
CommonIntegrateDrudeSCFStepKernel::~CommonIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
}
void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
cl.getPlatformData().initializeContexts(system);
void CommonIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
cc.initializeContexts();
cc.setAsCurrent();
// Identify Drude particles.
......@@ -398,49 +399,53 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D
// Create the kernels.
cl::Program program = cl.createProgram(OpenCLKernelSources::verlet, "");
kernel1 = cl::Kernel(program, "integrateVerletPart1");
kernel2 = cl::Kernel(program, "integrateVerletPart2");
ComputeProgram program = cc.compileProgram(CommonKernelSources::verlet);
kernel1 = program->createKernel("integrateVerletPart1");
kernel2 = program->createKernel("integrateVerletPart2");
prevStepSize = -1.0;
}
void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms();
void CommonIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
cc.setAsCurrent();
IntegrationUtilities& integration = cc.getIntegrationUtilities();
int numAtoms = cc.getNumAtoms();
double dt = integrator.getStepSize();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
kernel1.setArg<cl_int>(0, numAtoms);
kernel1.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel1, 3);
kernel1.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(6, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl_int>(0, numAtoms);
kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kernel2, 3);
kernel2.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
kernel1->addArg(numAtoms);
kernel1->addArg(cc.getPaddedNumAtoms());
kernel1->addArg(cc.getIntegrationUtilities().getStepSize());
kernel1->addArg(cc.getPosq());
kernel1->addArg(cc.getVelm());
kernel1->addArg(cc.getLongForceBuffer());
kernel1->addArg(integration.getPosDelta());
if (cc.getUseMixedPrecision())
kernel1->addArg(cc.getPosqCorrection());
kernel2->addArg(numAtoms);
kernel2->addArg(cc.getIntegrationUtilities().getStepSize());
kernel2->addArg(cc.getPosq());
kernel2->addArg(cc.getVelm());
kernel2->addArg(integration.getPosDelta());
if (cc.getUseMixedPrecision())
kernel2->addArg(cc.getPosqCorrection());
}
if (dt != prevStepSize) {
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
vector<mm_double2> stepSizeVec(1);
stepSizeVec[0] = mm_double2(dt, dt);
cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
cc.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
else {
vector<mm_float2> stepSizeVec(1);
stepSizeVec[0] = mm_float2((cl_float) dt, (cl_float) dt);
cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
stepSizeVec[0] = mm_float2((float) dt, (float) dt);
cc.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
prevStepSize = dt;
}
// Call the first integration kernel.
cl.executeKernel(kernel1, numAtoms);
kernel1->execute(numAtoms);
// Apply constraints.
......@@ -448,7 +453,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
// Call the second integration kernel.
cl.executeKernel(kernel2, numAtoms);
kernel2->execute(numAtoms);
// Update the positions of virtual sites and Drude particles.
......@@ -457,40 +462,40 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud
// Update the time and step count.
cl.setTime(cl.getTime()+dt);
cl.setStepCount(cl.getStepCount()+1);
cl.reorderAtoms();
cc.setTime(cc.getTime()+dt);
cc.setStepCount(cc.getStepCount()+1);
cc.reorderAtoms();
// Reduce UI lag.
#ifdef WIN32
cl.getQueue().flush();
cc.flushQueue();
#endif
}
double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
double CommonIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
return cc.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
struct MinimizerData {
ContextImpl& context;
OpenCLContext& cl;
ComputeContext& cc;
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& drudeParticles) : context(context), cl(cl), drudeParticles(drudeParticles) {}
MinimizerData(ContextImpl& context, ComputeContext& cc, vector<int>& drudeParticles) : context(context), cc(cc), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
OpenCLContext& cl = data->cl;
ComputeContext& cc = data->cc;
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Set the particle positions.
cl.getPosq().download(cl.getPinnedBuffer());
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cc.getPosq().download(cc.getPinnedBuffer());
if (cc.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4& p = posq[drudeParticles[i]];
p.x = x[3*i];
......@@ -499,7 +504,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4& p = posq[drudeParticles[i]];
p.x = x[3*i];
......@@ -507,40 +512,31 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
p.z = x[3*i+2];
}
}
cl.getPosq().upload(cl.getPinnedBuffer());
cc.getPosq().upload(cc.getPinnedBuffer());
// Compute the forces and energy for this configuration.
double energy = context.calcForcesAndEnergy(true, true);
cl.getForce().download(cl.getPinnedBuffer());
if (cl.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = drudeParticles[i];
g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z;
}
}
else {
mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = drudeParticles[i];
g[3*i] = -force[index].x;
g[3*i+1] = -force[index].y;
g[3*i+2] = -force[index].z;
}
long long* force = (long long*) cc.getPinnedBuffer();
cc.getLongForceBuffer().download(force);
double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cc.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = drudeParticles[i];
g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
}
return energy;
}
void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
void CommonIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = drudeParticles.size();
cl.getPosq().download(cl.getPinnedBuffer());
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cc.getPosq().download(cc.getPinnedBuffer());
if (cc.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_double4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
......@@ -549,7 +545,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
mm_float4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
......@@ -571,6 +567,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cl, drudeParticles);
MinimizerData data(context, cc, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
#ifndef OPENCL_DRUDE_KERNELS_H_
#define OPENCL_DRUDE_KERNELS_H_
#ifndef COMMON_DRUDE_KERNELS_H_
#define COMMON_DRUDE_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Portions copyright (c) 2013-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -33,8 +33,8 @@
* -------------------------------------------------------------------------- */
#include "openmm/DrudeKernels.h"
#include "OpenCLContext.h"
#include "OpenCLArray.h"
#include "openmm/common/ComputeContext.h"
#include "openmm/common/ComputeArray.h"
#include "lbfgs.h"
namespace OpenMM {
......@@ -42,10 +42,10 @@ namespace OpenMM {
/**
* This kernel is invoked by DrudeForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
class CommonCalcDrudeForceKernel : public CalcDrudeForceKernel {
public:
OpenCLCalcDrudeForceKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
CalcDrudeForceKernel(name, platform), cl(cl) {
CommonCalcDrudeForceKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
CalcDrudeForceKernel(name, platform), cc(cc) {
}
/**
* Initialize the kernel.
......@@ -71,18 +71,18 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private:
OpenCLContext& cl;
OpenCLArray particleParams;
OpenCLArray pairParams;
ComputeContext& cc;
ComputeArray particleParams;
ComputeArray pairParams;
};
/**
* This kernel is invoked by DrudeLangevinIntegrator to take one time step
*/
class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
class CommonIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public:
OpenCLIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
CommonIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
IntegrateDrudeLangevinStepKernel(name, platform), cc(cc), hasInitializedKernels(false) {
}
/**
* Initialize the kernel.
......@@ -107,23 +107,23 @@ public:
*/
double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
ComputeContext& cc;
double prevStepSize;
OpenCLArray normalParticles;
OpenCLArray pairParticles;
cl::Kernel kernel1, kernel2, hardwallKernel;
bool hasInitializedKernels;
ComputeArray normalParticles;
ComputeArray pairParticles;
ComputeKernel kernel1, kernel2, hardwallKernel;
};
/**
* This kernel is invoked by DrudeSCFIntegrator to take one time step
*/
class OpenCLIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
class CommonIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
public:
OpenCLIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeSCFStepKernel(name, platform), cl(cl), hasInitializedKernels(false), minimizerPos(NULL) {
CommonIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
IntegrateDrudeSCFStepKernel(name, platform), cc(cc), minimizerPos(NULL), hasInitializedKernels(false) {
}
~OpenCLIntegrateDrudeSCFStepKernel();
~CommonIntegrateDrudeSCFStepKernel();
/**
* Initialize the kernel.
*
......@@ -148,15 +148,15 @@ public:
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private:
void minimize(ContextImpl& context, double tolerance);
OpenCLContext& cl;
bool hasInitializedKernels;
ComputeContext& cc;
double prevStepSize;
bool hasInitializedKernels;
std::vector<int> drudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
cl::Kernel kernel1, kernel2;
ComputeKernel kernel1, kernel2;
};
} // namespace OpenMM
#endif /*OPENCL_DRUDE_KERNELS_H_*/
#endif /*COMMON_DRUDE_KERNELS_H_*/
......@@ -2,14 +2,14 @@
* Perform the first step of Langevin integration.
*/
extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
const int* __restrict__ normalParticles, const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed vscale, mixed fscale,
mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, const float4* __restrict__ random, unsigned int randomIndex) {
KERNEL void integrateDrudeLangevinPart1(GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta,
GLOBAL const int* RESTRICT normalParticles, GLOBAL const int2* RESTRICT pairParticles, GLOBAL const mixed2* RESTRICT dt, mixed vscale, mixed fscale,
mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, GLOBAL const float4* RESTRICT random, unsigned int randomIndex) {
mixed stepSize = dt[0].y;
// Update normal particles.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_NORMAL_PARTICLES; i += blockDim.x*gridDim.x) {
for (int i = GLOBAL_ID; i < NUM_NORMAL_PARTICLES; i += GLOBAL_SIZE) {
int index = normalParticles[i];
mixed4 velocity = velm[index];
if (velocity.w != 0) {
......@@ -26,7 +26,7 @@ extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm
// Update Drude particle pairs.
randomIndex += NUM_NORMAL_PARTICLES;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
for (int i = GLOBAL_ID; i < NUM_PAIRS; i += GLOBAL_SIZE) {
int2 particles = pairParticles[i];
mixed4 velocity1 = velm[particles.x];
mixed4 velocity2 = velm[particles.y];
......@@ -69,14 +69,17 @@ extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm
* Perform the second step of Langevin integration.
*/
extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) {
KERNEL void integrateDrudeLangevinPart2(GLOBAL real4* RESTRICT posq, GLOBAL real4* RESTRICT posqCorrection, GLOBAL const mixed4* RESTRICT posDelta, GLOBAL mixed4* RESTRICT velm, GLOBAL const mixed2* RESTRICT dt) {
#ifdef SUPPORTS_DOUBLE_PRECISION
double invStepSize = 1.0/dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
#else
float invStepSize = 1.0f/dt[0].y;
#endif
int index = GLOBAL_ID;
while (index < NUM_ATOMS) {
mixed4 vel = velm[index];
if (vel.w != 0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
......@@ -98,17 +101,17 @@ extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq,
#endif
velm[index] = vel;
}
index += blockDim.x*gridDim.x;
index += GLOBAL_SIZE;
}
}
/**
* Apply hard wall constraints
*/
extern "C" __global__ void applyHardWallConstraints(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
KERNEL void applyHardWallConstraints(GLOBAL real4* RESTRICT posq, GLOBAL real4* RESTRICT posqCorrection, GLOBAL mixed4* RESTRICT velm,
GLOBAL const int2* RESTRICT pairParticles, GLOBAL const mixed2* RESTRICT dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
mixed stepSize = dt[0].y;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
for (int i = GLOBAL_ID; i < NUM_PAIRS; i += GLOBAL_SIZE) {
int2 particles = pairParticles[i];
#ifdef USE_MIXED_PRECISION
real4 posReal1 = posq[particles.x];
......
......@@ -12,7 +12,7 @@
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
SET(OPENMM_SOURCE_SUBDIRS . ../common)
# Collect up information about the version of the OpenMM library we're building
......@@ -59,32 +59,25 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
SET(COMMON_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/../common/src/CommonDrudeKernelSources.cpp)
SET(SOURCE_FILES ${SOURCE_FILES} ${COMMON_KERNELS_CPP})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../common/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/cuda/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CUDA_SOURCE_CLASS CudaDrudeKernelSources)
SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/common/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/../common/src)
# Create the library
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/cuda/EncodeCUDAFiles.cmake
DEPENDS ${CUDA_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
SET_SOURCE_FILES_PROPERTIES(${COMMON_KERNELS_CPP} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
ADD_DEPENDENCIES(${SHARED_TARGET} DrudeCommonKernels)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}CUDA)
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -27,7 +27,8 @@
#include <exception>
#include "CudaDrudeKernelFactory.h"
#include "CudaDrudeKernels.h"
#include "CommonDrudeKernels.h"
#include "CudaContext.h"
#include "openmm/internal/windowsExport.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
......@@ -63,10 +64,10 @@ extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories() {
KernelImpl* CudaDrudeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaContext& cu = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
if (name == CalcDrudeForceKernel::Name())
return new CudaCalcDrudeForceKernel(name, platform, cu);
return new CommonCalcDrudeForceKernel(name, platform, cu);
if (name == IntegrateDrudeLangevinStepKernel::Name())
return new CudaIntegrateDrudeLangevinStepKernel(name, platform, cu);
return new CommonIntegrateDrudeLangevinStepKernel(name, platform, cu);
if (name == IntegrateDrudeSCFStepKernel::Name())
return new CudaIntegrateDrudeSCFStepKernel(name, platform, cu);
return new CommonIntegrateDrudeSCFStepKernel(name, platform, cu);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaDrudeKernels.h"
#include "CudaDrudeKernelSources.h"
#include "openmm/internal/ContextImpl.h"
#include "CudaBondedUtilities.h"
#include "CudaForceInfo.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h"
#include "SimTKOpenMMRealType.h"
#include <set>
using namespace OpenMM;
using namespace std;
class CudaDrudeForceInfo : public CudaForceInfo {
public:
CudaDrudeForceInfo(const DrudeForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumParticles()+force.getNumScreenedPairs();
}
void getParticlesInGroup(int index, vector<int>& particles) {
particles.clear();
if (index < force.getNumParticles()) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(index, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
particles.push_back(p);
particles.push_back(p1);
if (p2 != -1)
particles.push_back(p2);
if (p3 != -1)
particles.push_back(p3);
if (p4 != -1)
particles.push_back(p4);
}
else {
int drude1, drude2;
double thole;
force.getScreenedPairParameters(index-force.getNumParticles(), drude1, drude2, thole);
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(drude1, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
particles.push_back(p);
particles.push_back(p1);
force.getParticleParameters(drude2, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
particles.push_back(p);
particles.push_back(p1);
}
}
bool areGroupsIdentical(int group1, int group2) {
if (group1 < force.getNumParticles() && group2 < force.getNumParticles()) {
int p, p1, p2, p3, p4;
double charge1, polarizability1, aniso12_1, aniso34_1;
double charge2, polarizability2, aniso12_2, aniso34_2;
force.getParticleParameters(group1, p, p1, p2, p3, p4, charge1, polarizability1, aniso12_1, aniso34_1);
force.getParticleParameters(group2, p, p1, p2, p3, p4, charge2, polarizability2, aniso12_2, aniso34_2);
return (charge1 == charge2 && polarizability1 == polarizability2 && aniso12_1 == aniso12_2 && aniso34_1 == aniso34_2);
}
if (group1 >= force.getNumParticles() && group2 >= force.getNumParticles()) {
int drude1, drude2;
double thole1, thole2;
force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole1);
force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole2);
return (thole1 == thole2);
}
return false;
}
private:
const DrudeForce& force;
};
void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
cu.setAsCurrent();
if (cu.getContextIndex() != 0)
return; // This is run entirely on one device
int numParticles = force.getNumParticles();
if (numParticles > 0) {
// Create the harmonic interaction .
vector<vector<int> > atoms(numParticles, vector<int>(5));
particleParams.initialize<float4>(cu, numParticles, "drudeParticleParams");
vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], charge, polarizability, aniso12, aniso34);
double a1 = (atoms[i][2] == -1 ? 1 : aniso12);
double a2 = (atoms[i][3] == -1 || atoms[i][4] == -1 ? 1 : aniso34);
double a3 = 3-a1-a2;
double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
if (atoms[i][2] == -1) {
atoms[i][2] = 0;
k1 = 0;
}
if (atoms[i][3] == -1 || atoms[i][4] == -1) {
atoms[i][3] = 0;
atoms[i][4] = 0;
k2 = 0;
}
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
}
int numPairs = force.getNumScreenedPairs();
if (numPairs > 0) {
// Create the screened interaction between dipole pairs.
vector<vector<int> > atoms(numPairs, vector<int>(4));
pairParams.initialize<float2>(cu, numPairs, "drudePairParams");
vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
int drude1, drude2;
double thole;
force.getScreenedPairParameters(i, drude1, drude2, thole);
int p2, p3, p4;
double charge1, charge2, polarizability1, polarizability2, aniso12, aniso34;
force.getParticleParameters(drude1, atoms[i][0], atoms[i][1], p2, p3, p4, charge1, polarizability1, aniso12, aniso34);
force.getParticleParameters(drude2, atoms[i][2], atoms[i][3], p2, p3, p4, charge2, polarizability2, aniso12, aniso34);
double screeningScale = thole/pow(polarizability1*polarizability2, 1.0/6.0);
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
}
pairParams.upload(paramVector);
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
}
cu.addForce(new CudaDrudeForceInfo(force));
}
double CudaCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
if (cu.getContextIndex() != 0)
return; // This is run entirely on one device
// Set the particle parameters.
int numParticles = force.getNumParticles();
if (numParticles > 0) {
if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
vector<float4> paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
double a1 = (p2 == -1 ? 1 : aniso12);
double a2 = (p3 == -1 || p4 == -1 ? 1 : aniso34);
double a3 = 3-a1-a2;
double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
if (p2 == -1)
k1 = 0;
if (p3 == -1 || p4 == -1)
k2 = 0;
paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
}
particleParams.upload(paramVector);
}
// Set the pair parameters.
int numPairs = force.getNumScreenedPairs();
if (numPairs > 0) {
if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
vector<float2> paramVector(numPairs);
for (int i = 0; i < numPairs; i++) {
int drude1, drude2;
double thole;
force.getScreenedPairParameters(i, drude1, drude2, thole);
int p, p1, p2, p3, p4;
double charge1, charge2, polarizability1, polarizability2, aniso12, aniso34;
force.getParticleParameters(drude1, p, p1, p2, p3, p4, charge1, polarizability1, aniso12, aniso34);
force.getParticleParameters(drude2, p, p1, p2, p3, p4, charge2, polarizability2, aniso12, aniso34);
double screeningScale = thole/pow(polarizability1*polarizability2, 1.0/6.0);
double energyScale = ONE_4PI_EPS0*charge1*charge2;
paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
}
pairParams.upload(paramVector);
}
}
void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
cu.getPlatformData().initializeContexts(system);
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Identify particle pairs and ordinary particles.
set<int> particles;
vector<int> normalParticleVec;
vector<int2> pairParticleVec;
for (int i = 0; i < system.getNumParticles(); i++)
particles.insert(i);
for (int i = 0; i < force.getNumParticles(); i++) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
particles.erase(p);
particles.erase(p1);
pairParticleVec.push_back(make_int2(p, p1));
}
normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
normalParticles.initialize<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
pairParticles.initialize<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
if (normalParticleVec.size() > 0)
normalParticles.upload(normalParticleVec);
if (pairParticleVec.size() > 0)
pairParticles.upload(pairParticleVec);
// Create kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_NORMAL_PARTICLES"] = cu.intToString(normalParticleVec.size());
defines["NUM_PAIRS"] = cu.intToString(pairParticleVec.size());
map<string, string> replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaDrudeKernelSources::drudeLangevin, defines, "");
kernel1 = cu.getKernel(module, "integrateDrudeLangevinPart1");
kernel2 = cu.getKernel(module, "integrateDrudeLangevinPart2");
hardwallKernel = cu.getKernel(module, "applyHardWallConstraints");
prevStepSize = -1.0;
}
void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
// Compute integrator coefficients.
double stepSize = integrator.getStepSize();
double vscale = exp(-stepSize*integrator.getFriction());
double fscale = (1-vscale)/integrator.getFriction()/(double) 0x100000000;
double noisescale = sqrt(2*BOLTZ*integrator.getTemperature()*integrator.getFriction())*sqrt(0.5*(1-vscale*vscale)/integrator.getFriction());
double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
double maxDrudeDistance = integrator.getMaxDrudeDistance();
double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
if (stepSize != prevStepSize) {
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double2 ss = make_double2(0, stepSize);
integration.getStepSize().upload(&ss);
}
else {
float2 ss = make_float2(0, (float) stepSize);
integration.getStepSize().upload(&ss);
}
prevStepSize = stepSize;
}
// Create appropriate pointer for the precision mode.
float vscaleFloat = (float) vscale;
float fscaleFloat = (float) fscale;
float noisescaleFloat = (float) noisescale;
float vscaleDrudeFloat = (float) vscaleDrude;
float fscaleDrudeFloat = (float) fscaleDrude;
float noisescaleDrudeFloat = (float) noisescaleDrude;
float maxDrudeDistanceFloat =(float) maxDrudeDistance;
float hardwallscaleDrudeFloat = (float) hardwallscaleDrude;
void *vscalePtr, *fscalePtr, *noisescalePtr, *vscaleDrudePtr, *fscaleDrudePtr, *noisescaleDrudePtr, *maxDrudeDistancePtr, *hardwallscaleDrudePtr;
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
vscalePtr = &vscale;
fscalePtr = &fscale;
noisescalePtr = &noisescale;
vscaleDrudePtr = &vscaleDrude;
fscaleDrudePtr = &fscaleDrude;
noisescaleDrudePtr = &noisescaleDrude;
maxDrudeDistancePtr = &maxDrudeDistance;
hardwallscaleDrudePtr = &hardwallscaleDrude;
}
else {
vscalePtr = &vscaleFloat;
fscalePtr = &fscaleFloat;
noisescalePtr = &noisescaleFloat;
vscaleDrudePtr = &vscaleDrudeFloat;
fscaleDrudePtr = &fscaleDrudeFloat;
noisescaleDrudePtr = &noisescaleDrudeFloat;
maxDrudeDistancePtr = &maxDrudeDistanceFloat;
hardwallscaleDrudePtr = &hardwallscaleDrudeFloat;
}
// Call the first integration kernel.
int randomIndex = integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize());
void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
&normalParticles.getDevicePointer(), &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(),
vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
cu.executeKernel(kernel1, args1, numAtoms);
// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the second integration kernel.
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
&cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms);
// Apply hard wall constraints.
if (maxDrudeDistance > 0) {
void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
&pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles.getSize());
}
integration.computeVirtualSites();
// Update the time and step count.
cu.setTime(cu.getTime()+stepSize);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
if (minimizerPos != NULL)
lbfgs_free(minimizerPos);
}
void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
cu.getPlatformData().initializeContexts(system);
cu.setAsCurrent();
// Identify Drude particles.
for (int i = 0; i < force.getNumParticles(); i++) {
int p, p1, p2, p3, p4;
double charge, polarizability, aniso12, aniso34;
force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
drudeParticles.push_back(p);
}
// Initialize the energy minimizer.
minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
if (minimizerPos == NULL)
throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
lbfgs_parameter_init(&minimizerParams);
minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
// Create the kernels.
map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
kernel1 = cu.getKernel(module, "integrateVerletPart1");
kernel2 = cu.getKernel(module, "integrateVerletPart2");
prevStepSize = -1.0;
}
void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
cu.setAsCurrent();
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
int numAtoms = cu.getNumAtoms();
int paddedNumAtoms = cu.getPaddedNumAtoms();
double dt = integrator.getStepSize();
if (dt != prevStepSize) {
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
vector<double2> stepSizeVec(1);
stepSizeVec[0] = make_double2(dt, dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
else {
vector<float2> stepSizeVec(1);
stepSizeVec[0] = make_float2((float) dt, (float) dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
prevStepSize = dt;
}
// Call the first integration kernel.
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel1, args1, numAtoms);
// Apply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the second integration kernel.
void* args2[] = {&numAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
&cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
cu.executeKernel(kernel2, args2, numAtoms);
// Update the positions of virtual sites and Drude particles.
integration.computeVirtualSites();
minimize(context, integrator.getMinimizationErrorTolerance());
// Update the time and step count.
cu.setTime(cu.getTime()+dt);
cu.setStepCount(cu.getStepCount()+1);
cu.reorderAtoms();
}
double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
}
struct MinimizerData {
ContextImpl& context;
CudaContext& cu;
vector<int>& drudeParticles;
MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& drudeParticles) : context(context), cu(cu), drudeParticles(drudeParticles) {}
};
static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
ContextImpl& context = data->context;
CudaContext& cu = data->cu;
vector<int>& drudeParticles = data->drudeParticles;
int numDrudeParticles = drudeParticles.size();
// Set the particle positions.
cu.getPosq().download(cu.getPinnedBuffer());
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4& p = posq[drudeParticles[i]];
p.x = x[3*i];
p.y = x[3*i+1];
p.z = x[3*i+2];
}
}
cu.getPosq().upload(cu.getPinnedBuffer());
// Compute the forces and energy for this configuration.
double energy = context.calcForcesAndEnergy(true, true);
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
double forceScale = -1.0/0x100000000;
int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = 0; i < numDrudeParticles; ++i) {
int index = drudeParticles[i];
g[3*i] = forceScale*force[index];
g[3*i+1] = forceScale*force[index+paddedNumAtoms];
g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
}
return energy;
}
void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
// Record the initial positions.
int numDrudeParticles = drudeParticles.size();
cu.getPosq().download(cu.getPinnedBuffer());
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
double4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numDrudeParticles; ++i) {
float4 p = posq[drudeParticles[i]];
minimizerPos[3*i] = p.x;
minimizerPos[3*i+1] = p.y;
minimizerPos[3*i+2] = p.z;
}
minimizerParams.xtol = 1e-7;
}
// Determine a normalization constant for scaling the tolerance.
double norm = 0.0;
for (int i = 0; i < 3*numDrudeParticles; i++)
norm += minimizerPos[i]*minimizerPos[i];
norm /= numDrudeParticles;
norm = (norm < 1 ? 1 : sqrt(norm));
minimizerParams.epsilon = tolerance/norm;
// Perform the minimization.
lbfgsfloatval_t fx;
MinimizerData data(context, cu, drudeParticles);
lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
}
\ No newline at end of file
#ifndef CUDA_DRUDE_KERNELS_H_
#define CUDA_DRUDE_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/DrudeKernels.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include "lbfgs.h"
namespace OpenMM {
/**
* This kernel is invoked by DrudeForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
public:
CudaCalcDrudeForceKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
CalcDrudeForceKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the DrudeForce this kernel will be used for
*/
void initialize(const System& system, const DrudeForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the DrudeForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private:
CudaContext& cu;
CudaArray particleParams;
CudaArray pairParams;
};
/**
* This kernel is invoked by DrudeLangevinIntegrator to take one time step
*/
class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public:
CudaIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
IntegrateDrudeLangevinStepKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the DrudeLangevinIntegrator this kernel will be used for
* @param force the DrudeForce to get particle parameters from
*/
void initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeLangevinIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeLangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
private:
CudaContext& cu;
double prevStepSize;
CudaArray normalParticles;
CudaArray pairParticles;
CUfunction kernel1, kernel2, hardwallKernel;
};
/**
* This kernel is invoked by DrudeSCFIntegrator to take one time step
*/
class CudaIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
public:
CudaIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
IntegrateDrudeSCFStepKernel(name, platform), cu(cu), minimizerPos(NULL) {
}
~CudaIntegrateDrudeSCFStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the DrudeSCFIntegrator this kernel will be used for
* @param force the DrudeForce to get particle parameters from
*/
void initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeSCFIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const DrudeSCFIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the DrudeSCFIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
private:
void minimize(ContextImpl& context, double tolerance);
CudaContext& cu;
double prevStepSize;
std::vector<int> drudeParticles;
lbfgsfloatval_t *minimizerPos;
lbfgs_parameter_t minimizerParams;
CUfunction kernel1, kernel2;
};
} // namespace OpenMM
#endif /*CUDA_DRUDE_KERNELS_H_*/
......@@ -5,6 +5,7 @@
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
//#include "ReferenceTests.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/DrudeForce.h"
#include "CudaPlatform.h"
#include "SimTKOpenMMUtilities.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories();
void runPlatformTests() { }
#include "TestDrudeNoseHoover.h"
Platform& initializePlatform(int argc, char* argv[]) {
registerDrudeCudaKernelFactories();
if (argc > 1) Platform::getPlatformByName("CUDA").setPropertyDefaultValue("Precision", std::string(argv[1]));
return Platform::getPlatformByName("CUDA");
}
......@@ -12,7 +12,7 @@
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
SET(OPENMM_SOURCE_SUBDIRS . ../common)
# Collect up information about the version of the OpenMM library we're building
......@@ -59,32 +59,25 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
SET(COMMON_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/../common/src/CommonDrudeKernelSources.cpp)
SET(SOURCE_FILES ${SOURCE_FILES} ${COMMON_KERNELS_CPP})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../common/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/opencl/src)
# Set variables needed for encoding kernel sources into a C++ class
SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CL_SOURCE_CLASS OpenCLDrudeKernelSources)
SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/common/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/../common/src)
# Create the library
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/opencl/EncodeCLFiles.cmake
DEPENDS ${OPENCL_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
SET_SOURCE_FILES_PROPERTIES(${COMMON_KERNELS_CPP} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
ADD_DEPENDENCIES(${SHARED_TARGET} DrudeCommonKernels)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}OpenCL)
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -27,7 +27,8 @@
#include <exception>
#include "OpenCLDrudeKernelFactory.h"
#include "OpenCLDrudeKernels.h"
#include "CommonDrudeKernels.h"
#include "OpenCLContext.h"
#include "openmm/internal/windowsExport.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
......@@ -63,10 +64,10 @@ extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories() {
KernelImpl* OpenCLDrudeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
OpenCLContext& cl = *static_cast<OpenCLPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
if (name == CalcDrudeForceKernel::Name())
return new OpenCLCalcDrudeForceKernel(name, platform, cl);
return new CommonCalcDrudeForceKernel(name, platform, cl);
if (name == IntegrateDrudeLangevinStepKernel::Name())
return new OpenCLIntegrateDrudeLangevinStepKernel(name, platform, cl);
return new CommonIntegrateDrudeLangevinStepKernel(name, platform, cl);
if (name == IntegrateDrudeSCFStepKernel::Name())
return new OpenCLIntegrateDrudeSCFStepKernel(name, platform, cl);
return new CommonIntegrateDrudeSCFStepKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/**
* Perform the first step of Langevin integration.
*/
__kernel void integrateDrudeLangevinPart1(__global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
__global const int* restrict normalParticles, __global const int2* restrict pairParticles, __global const mixed2* restrict dt, mixed vscale, mixed fscale,
mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, __global const float4* restrict random, unsigned int randomIndex) {
mixed stepSize = dt[0].y;
// Update normal particles.
for (int i = get_global_id(0); i < NUM_NORMAL_PARTICLES; i += get_global_size(0)) {
int index = normalParticles[i];
mixed4 velocity = velm[index];
if (velocity.w != 0) {
mixed sqrtInvMass = sqrt(velocity.w);
float4 rand = random[randomIndex+index];
real4 f = force[index];
velocity.x = vscale*velocity.x + fscale*velocity.w*f.x + noisescale*sqrtInvMass*rand.x;
velocity.y = vscale*velocity.y + fscale*velocity.w*f.y + noisescale*sqrtInvMass*rand.y;
velocity.z = vscale*velocity.z + fscale*velocity.w*f.z + noisescale*sqrtInvMass*rand.z;
velm[index] = velocity;
posDelta[index] = (mixed4) (stepSize*velocity.x, stepSize*velocity.y, stepSize*velocity.z, 0);
}
}
// Update Drude particle pairs.
randomIndex += NUM_NORMAL_PARTICLES;
for (int i = get_global_id(0); i < NUM_PAIRS; i += get_global_size(0)) {
int2 particles = pairParticles[i];
mixed4 velocity1 = velm[particles.x];
mixed4 velocity2 = velm[particles.y];
mixed mass1 = 1/velocity1.w;
mixed mass2 = 1/velocity2.w;
mixed invTotalMass = 1/(mass1+mass2);
mixed invReducedMass = (mass1+mass2)*velocity1.w*velocity2.w;
mixed mass1fract = invTotalMass*mass1;
mixed mass2fract = invTotalMass*mass2;
mixed sqrtInvTotalMass = sqrt(invTotalMass);
mixed sqrtInvReducedMass = sqrt(invReducedMass);
mixed4 cmVel = velocity1*mass1fract+velocity2*mass2fract;
mixed4 relVel = velocity2-velocity1;
mixed4 force1 = convert_mixed4(force[particles.x]);
mixed4 force2 = convert_mixed4(force[particles.y]);
mixed4 cmForce = force1+force2;
mixed4 relForce = force2*mass1fract - force1*mass2fract;
float4 rand1 = random[randomIndex+2*i];
float4 rand2 = random[randomIndex+2*i+1];
cmVel.x = vscale*cmVel.x + fscale*invTotalMass*cmForce.x + noisescale*sqrtInvTotalMass*rand1.x;
cmVel.y = vscale*cmVel.y + fscale*invTotalMass*cmForce.y + noisescale*sqrtInvTotalMass*rand1.y;
cmVel.z = vscale*cmVel.z + fscale*invTotalMass*cmForce.z + noisescale*sqrtInvTotalMass*rand1.z;
relVel.x = vscaleDrude*relVel.x + fscaleDrude*invReducedMass*relForce.x + noisescaleDrude*sqrtInvReducedMass*rand2.x;
relVel.y = vscaleDrude*relVel.y + fscaleDrude*invReducedMass*relForce.y + noisescaleDrude*sqrtInvReducedMass*rand2.y;
relVel.z = vscaleDrude*relVel.z + fscaleDrude*invReducedMass*relForce.z + noisescaleDrude*sqrtInvReducedMass*rand2.z;
velocity1.xyz = cmVel.xyz-relVel.xyz*mass2fract;
velocity2.xyz = cmVel.xyz+relVel.xyz*mass1fract;
velm[particles.x] = velocity1;
velm[particles.y] = velocity2;
posDelta[particles.x] = (mixed4) (stepSize*velocity1.x, stepSize*velocity1.y, stepSize*velocity1.z, 0);
posDelta[particles.y] = (mixed4) (stepSize*velocity2.x, stepSize*velocity2.y, stepSize*velocity2.z, 0);
}
}
/**
* Perform the second step of Langevin integration.
*/
__kernel void integrateDrudeLangevinPart2(__global real4* restrict posq, __global real4* restrict posqCorrection, __global const mixed4* restrict posDelta, __global mixed4* restrict velm, __global const mixed2* restrict dt) {
#ifdef SUPPORTS_DOUBLE_PRECISION
double invStepSize = 1.0/dt[0].y;
#else
float invStepSize = 1.0f/dt[0].y;
#endif
int index = get_global_id(0);
while (index < NUM_ATOMS) {
mixed4 vel = velm[index];
if (vel.w != 0.0) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.xyz += delta.xyz;
#ifdef SUPPORTS_DOUBLE_PRECISION
vel.xyz = convert_mixed4(invStepSize*convert_double4(delta)).xyz;
#else
vel.xyz = invStepSize*delta.xyz;
#endif
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
velm[index] = vel;
}
index += get_global_size(0);
}
}
/**
* Apply hard wall constraints
*/
__kernel void applyHardWallConstraints(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm,
__global const int2* restrict pairParticles, __global const mixed2* restrict dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
mixed stepSize = dt[0].y;
for (int i = get_global_id(0); i < NUM_PAIRS; i += get_global_size(0)) {
int2 particles = pairParticles[i];
#ifdef USE_MIXED_PRECISION
real4 posReal1 = posq[particles.x];
real4 posReal2 = posq[particles.y];
real4 posCorr1 = posqCorrection[particles.x];
real4 posCorr2 = posqCorrection[particles.y];
mixed4 pos1 = (mixed4) (posReal1.x+(mixed)posCorr1.x, posReal1.y+(mixed)posCorr1.y, posReal1.z+(mixed)posCorr1.z, posReal1.w);
mixed4 pos2 = (mixed4) (posReal2.x+(mixed)posCorr2.x, posReal2.y+(mixed)posCorr2.y, posReal2.z+(mixed)posCorr2.z, posReal2.w);
#else
mixed4 pos1 = posq[particles.x];
mixed4 pos2 = posq[particles.y];
#endif
mixed4 delta = pos1-pos2;
mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
mixed rInv = 1/r;
if (rInv*maxDrudeDistance < 1) {
// The constraint has been violated, so make the inter-particle distance "bounce"
// off the hard wall.
mixed4 bondDir = delta*rInv;
mixed4 vel1 = velm[particles.x];
mixed4 vel2 = velm[particles.y];
mixed mass1 = 1/vel1.w;
mixed mass2 = 1/vel2.w;
mixed deltaR = r-maxDrudeDistance;
mixed deltaT = stepSize;
mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
mixed4 vb1 = bondDir*dotvr1;
mixed4 vp1 = vel1-vb1;
if (vel2.w == 0) {
// The parent particle is massless, so move only the Drude particle.
if (dotvr1 != 0)
deltaT = deltaR/fabs(dotvr1);
if (deltaT > stepSize)
deltaT = stepSize;
dotvr1 = -dotvr1*hardwallscaleDrude/(fabs(dotvr1)*sqrt(mass1));
mixed dr = -deltaR + deltaT*dotvr1;
pos1.xyz += bondDir.xyz*dr;
#ifdef USE_MIXED_PRECISION
posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
#else
posq[particles.x] = pos1;
#endif
vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
velm[particles.x] = vel1;
}
else {
// Move both particles.
mixed invTotalMass = 1/(mass1+mass2);
mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
mixed4 vb2 = bondDir*dotvr2;
mixed4 vp2 = vel2-vb2;
mixed vbCMass = (mass1*dotvr1 + mass2*dotvr2)*invTotalMass;
dotvr1 -= vbCMass;
dotvr2 -= vbCMass;
if (dotvr1 != dotvr2)
deltaT = deltaR/fabs(dotvr1-dotvr2);
if (deltaT > stepSize)
deltaT = stepSize;
mixed vBond = hardwallscaleDrude/sqrt(mass1);
dotvr1 = -dotvr1*vBond*mass2*invTotalMass/fabs(dotvr1);
dotvr2 = -dotvr2*vBond*mass1*invTotalMass/fabs(dotvr2);
mixed dr1 = -deltaR*mass2*invTotalMass + deltaT*dotvr1;
mixed dr2 = deltaR*mass1*invTotalMass + deltaT*dotvr2;
dotvr1 += vbCMass;
dotvr2 += vbCMass;
pos1.xyz += bondDir.xyz*dr1;
pos2.xyz += bondDir.xyz*dr2;
#ifdef USE_MIXED_PRECISION
posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
posq[particles.y] = (real4) ((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
posqCorrection[particles.y] = (real4) (pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
#else
posq[particles.x] = pos1;
posq[particles.y] = pos2;
#endif
vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
vel2.xyz = vp2.xyz + bondDir.xyz*dotvr2;
velm[particles.x] = vel1;
velm[particles.y] = vel2;
}
}
}
}
float2 drudeParams = PARAMS[index];
real4 force1 = 0;
real4 force2 = 0;
real4 force3 = 0;
real4 force4 = 0;
// First pair.
real4 delta = (real4) (pos1.xyz-pos3.xyz, 0);
real rInv = RSQRT(dot(delta, delta));
real r = RECIP(rInv);
real u = drudeParams.x*r;
real screening = 1-(1+0.5f*u)*EXP(-u);
real pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy;
real4 f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force1 += f;
force3 -= f;
// Second pair.
delta = (real4) (pos1.xyz-pos4.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force1 += f;
force4 -= f;
// Third pair.
delta = (real4) (pos2.xyz-pos3.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force2 += f;
force3 -= f;
// Fourth pair.
delta = (real4) (pos2.xyz-pos4.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force2 += f;
force4 -= f;
real4 delta = (real4) (pos1.xyz-pos2.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float4 drudeParams = PARAMS[index];
float k1 = drudeParams.x;
float k2 = drudeParams.y;
float k3 = drudeParams.z;
// Compute the isotropic force.
energy += 0.5f*k3*r2;
real4 force1 = -delta*k3;
real4 force2 = delta*k3;
real4 force3 = 0;
real4 force4 = 0;
real4 force5 = 0;
// Compute the first anisotropic force.
if (k1 != 0) {
real4 dir = (real4) (pos2.xyz-pos3.xyz, 0);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
energy += 0.5f*k1*rprime*rprime;
real4 f1 = dir*(k1*rprime);
real4 f2 = (delta-dir*rprime)*(k1*rprime*invDist);
force1 -= f1;
force2 += f1-f2;
force3 += f2;
}
// Compute the second anisotropic force.
if (k2 != 0) {
real4 dir = (real4) (pos4.xyz-pos5.xyz, 0);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
energy += 0.5f*k2*rprime*rprime;
real4 f1 = dir*(k2*rprime);
real4 f2 = (delta-dir*rprime)*(k2*rprime*invDist);
force1 -= f1;
force2 += f1;
force4 -= f2;
force5 += f2;
}
......@@ -5,6 +5,7 @@
ENABLE_TESTING()
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
//#include "ReferenceTests.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/DrudeForce.h"
#include "OpenCLPlatform.h"
#include "SimTKOpenMMUtilities.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories();
void runPlatformTests() { }
#include "TestDrudeNoseHoover.h"
Platform& initializePlatform(int argc, char* argv[]) {
registerDrudeOpenCLKernelFactories();
if (argc > 1) Platform::getPlatformByName("OpenCL").setPropertyDefaultValue("Precision", std::string(argv[1]));
return Platform::getPlatformByName("OpenCL");
}
......@@ -5,6 +5,7 @@ ENABLE_TESTING()
INCLUDE_DIRECTORIES(${OPENMM_DIR}/platforms/reference/include)
INCLUDE_DIRECTORIES(${OPENMM_DIR}/openmmapi/include/openmm)
INCLUDE_DIRECTORIES(${OPENMM_DIR}/platforms/reference/src)
INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)
SET(SHARED_OPENMM_DRUDE_TARGET OpenMMDrude)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment