Commit ffab317b authored by Peter Eastman's avatar Peter Eastman
Browse files

Began OpenCL implementation of GBSA

parent efac00cf
...@@ -50,8 +50,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -50,8 +50,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcNonbondedForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name()) // if (name == CalcCustomNonbondedForceKernel::Name())
// return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem()); // return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name()) if (name == CalcGBSAOBCForceKernel::Name())
// return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl); return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl);
if (name == IntegrateVerletStepKernel::Name()) if (name == IntegrateVerletStepKernel::Name())
return new OpenCLIntegrateVerletStepKernel(name, platform, cl); return new OpenCLIntegrateVerletStepKernel(name, platform, cl);
if (name == IntegrateLangevinStepKernel::Name()) if (name == IntegrateLangevinStepKernel::Name())
......
...@@ -54,6 +54,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context ...@@ -54,6 +54,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
} }
void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) { void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions();
cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers()); cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers());
cl.getNonbondedUtilities().prepareInteractions(); cl.getNonbondedUtilities().prepareInteractions();
} }
...@@ -67,6 +68,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex ...@@ -67,6 +68,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex
} }
double OpenCLCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) { double OpenCLCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions();
OpenCLArray<cl_float>& energy = cl.getEnergyBuffer(); OpenCLArray<cl_float>& energy = cl.getEnergyBuffer();
energy.download(); energy.download();
double sum = 0.0f; double sum = 0.0f;
...@@ -584,7 +586,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -584,7 +586,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method); // gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
string source = cl.loadSourceFromFile("coulombLennardJones.cl", defines); string source = cl.loadSourceFromFile("coulombLennardJones.cl", defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList, source); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList, source);
cl.getNonbondedUtilities().addParameter("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer()); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer()));
cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance(); cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance();
// Compute the Ewald self energy. // Compute the Ewald self energy.
...@@ -629,7 +631,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -629,7 +631,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
} }
void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) { void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions();
if (exceptionIndices != NULL) { if (exceptionIndices != NULL) {
int numExceptions = exceptionIndices->getSize(); int numExceptions = exceptionIndices->getSize();
exceptionsKernel.setArg<cl_int>(0, cl.getPaddedNumAtoms()); exceptionsKernel.setArg<cl_int>(0, cl.getPaddedNumAtoms());
...@@ -766,90 +767,104 @@ double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) { ...@@ -766,90 +767,104 @@ double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
// SetCustomNonbondedGlobalParams(&globalParamValues[0]); // SetCustomNonbondedGlobalParams(&globalParamValues[0]);
//} //}
// //
//OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() { OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
//} if (params != NULL)
// delete params;
//void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { if (bornSum != NULL)
// delete bornSum;
// int numParticles = system.getNumParticles(); if (bornRadii != NULL)
// _gpuContext* gpu = data.gpu; delete bornRadii;
// vector<float> radius(numParticles); if (bornForce != NULL)
// vector<float> scale(numParticles); delete bornForce;
// vector<float> charge(numParticles); if (obcChain != NULL)
// for (int i = 0; i < numParticles; i++) { delete obcChain;
// double particleCharge, particleRadius, scalingFactor; }
// force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
// radius[i] = (float) particleRadius; void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// scale[i] = (float) scalingFactor;
// charge[i] = (float) particleCharge; params = new OpenCLArray<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
// } bornRadii = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornRadii");
// gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge); bornForce = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornForce");
//} obcChain = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "obcChain");
// OpenCLArray<mm_float4>& posq = cl.getPosq();
//void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { int numParticles = force.getNumParticles();
//} vector<mm_float2> paramsVector(numParticles);
// const double dielectricOffset = 0.009;
//static void initializeIntegration(const System& system, OpenCLPlatform::PlatformData& data, const Integrator& integrator) { for (int i = 0; i < numParticles; i++) {
// double charge, radius, scalingFactor;
// // Initialize any terms that haven't already been handled by a Force. force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// _gpuContext* gpu = data.gpu; paramsVector[i] = (mm_float2) {(float) radius, (float) (scalingFactor*radius)};
// if (!data.hasBonds) posq[i].w = (float) charge;
// gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>()); }
// if (!data.hasAngles) posq.upload();
// gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>()); params->upload(paramsVector);
// if (!data.hasPeriodicTorsions) prefactor = 2.0*-166.02691*0.4184*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>()); }
// if (!data.hasRB)
// gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
// vector<float>(), vector<float>(), vector<float>(), vector<float>()); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
// if (!data.hasNonbonded) { if (bornSum == NULL) {
// gpuSetCoulombParameters(gpu, 138.935485f, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF); // These objects cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
// gpuSetLJ14Parameters(gpu, 138.935485f, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
// } bornSum = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "bornSum");
// map<string, string> defines;
// // Set masses. if (nb.getForceBufferPerAtomBlock())
// defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
// int numParticles = system.getNumParticles(); if (nb.getUseCutoff())
// vector<float> mass(numParticles); defines["USE_CUTOFF"] = "1";
// for (int i = 0; i < numParticles; i++) if (nb.getUsePeriodic())
// mass[i] = (float) system.getParticleMass(i); defines["USE_PERIODIC"] = "1";
// gpuSetMass(gpu, mass); cl::Program program = cl.createProgram(cl.loadSourceFromFile("gbsaObc.cl"), defines);
// computeBornSumKernel = cl::Kernel(program, "computeBornSum");
// // Set constraints. reduceBornSumKernel = cl::Kernel(program, "reduceBornSum");
// }
// int numConstraints = system.getNumConstraints(); cl.clearBuffer(*bornSum);
// vector<int> particle1(numConstraints); cl.clearBuffer(*bornForce);
// vector<int> particle2(numConstraints);
// vector<float> distance(numConstraints); // Compute the Born sum.
// vector<float> invMass1(numConstraints);
// vector<float> invMass2(numConstraints); computeBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms());
// for (int i = 0; i < numConstraints; i++) { computeBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
// int particle1Index, particle2Index; computeBornSumKernel.setArg<cl::Buffer>(2, bornSum->getDeviceBuffer());
// double constraintDistance; computeBornSumKernel.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
// system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance); computeBornSumKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
// particle1[i] = particle1Index; computeBornSumKernel.setArg(5, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
// particle2[i] = particle2Index; computeBornSumKernel.setArg<cl::Buffer>(6, params->getDeviceBuffer());
// distance[i] = (float) constraintDistance; computeBornSumKernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float2), NULL);
// invMass1[i] = 1.0f/mass[particle1Index]; if (nb.getUseCutoff()) {
// invMass2[i] = 1.0f/mass[particle2Index]; computeBornSumKernel.setArg<cl::Buffer>(8, nb.getInteractingTiles().getDeviceBuffer());
// } computeBornSumKernel.setArg<cl_float>(9, nb.getCutoffDistance()*nb.getCutoffDistance());
// gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance()); computeBornSumKernel.setArg<mm_float4>(10, nb.getPeriodicBoxSize());
// computeBornSumKernel.setArg<cl::Buffer>(11, nb.getInteractionFlags().getDeviceBuffer());
// // Finish initialization. computeBornSumKernel.setArg<cl::Buffer>(12, nb.getInteractionCount().getDeviceBuffer());
// computeBornSumKernel.setArg(13, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
// gpuBuildThreadBlockWorkList(gpu); }
// gpuBuildExclusionList(gpu); else {
// gpuBuildOutputBuffers(gpu); computeBornSumKernel.setArg<cl::Buffer>(8, nb.getTiles().getDeviceBuffer());
// gpuSetConstants(gpu); computeBornSumKernel.setArg<cl_uint>(9, nb.getTiles().getSize());
// kClearBornForces(gpu); }
// kClearForces(gpu); cl.executeKernel(computeBornSumKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
// cudaThreadSynchronize();
//} // Reduce the Born sum.
//
//double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) { reduceBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms());
// return 0.0; reduceBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
//} reduceBornSumKernel.setArg<cl_int>(2, cl.getNumForceBuffers());
reduceBornSumKernel.setArg<cl_float>(3, 1.0f);
reduceBornSumKernel.setArg<cl_float>(4, 0.8f);
reduceBornSumKernel.setArg<cl_float>(5, 4.85f);
reduceBornSumKernel.setArg<cl::Buffer>(6, bornSum->getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, bornRadii->getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(9, obcChain->getDeviceBuffer());
cl.executeKernel(reduceBornSumKernel, cl.getPaddedNumAtoms());
}
double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() { OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
} }
......
...@@ -368,38 +368,46 @@ private: ...@@ -368,38 +368,46 @@ private:
// std::vector<float> globalParamValues; // std::vector<float> globalParamValues;
// System& system; // System& system;
//}; //};
//
///** /**
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system. * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// */ */
//class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel { class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
//public: public:
// OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl) { OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl), bornSum(NULL) {
// } }
// ~OpenCLCalcGBSAOBCForceKernel(); ~OpenCLCalcGBSAOBCForceKernel();
// /** /**
// * Initialize the kernel. * Initialize the kernel.
// * *
// * @param system the System this kernel will be applied to * @param system the System this kernel will be applied to
// * @param force the GBSAOBCForce this kernel will be used for * @param force the GBSAOBCForce this kernel will be used for
// */ */
// void initialize(const System& system, const GBSAOBCForce& force); void initialize(const System& system, const GBSAOBCForce& force);
// /** /**
// * Execute the kernel to calculate the forces. * Execute the kernel to calculate the forces.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// */ */
// void executeForces(ContextImpl& context); void executeForces(ContextImpl& context);
// /** /**
// * Execute the kernel to calculate the energy. * Execute the kernel to calculate the energy.
// * *
// * @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
// * @return the potential energy due to the GBSAOBCForce * @return the potential energy due to the GBSAOBCForce
// */ */
// double executeEnergy(ContextImpl& context); double executeEnergy(ContextImpl& context);
//private: private:
// OpenCLContext& cl; double prefactor;
//}; OpenCLContext& cl;
OpenCLArray<mm_float2>* params;
OpenCLArray<cl_float>* bornSum;
OpenCLArray<cl_float>* bornRadii;
OpenCLArray<cl_float>* bornForce;
OpenCLArray<cl_float>* obcChain;
cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel;
};
/** /**
* This kernel is invoked by VerletIntegrator to take one time step. * This kernel is invoked by VerletIntegrator to take one time step.
......
...@@ -96,8 +96,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic ...@@ -96,8 +96,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
} }
} }
void OpenCLNonbondedUtilities::addParameter(const string& name, const string& type, int size, cl::Buffer& buffer) { void OpenCLNonbondedUtilities::addParameter(const ParameterInfo& parameter) {
parameters.push_back(ParameterInfo(name, type, size, buffer)); parameters.push_back(parameter);
} }
void OpenCLNonbondedUtilities::initialize(const System& system) { void OpenCLNonbondedUtilities::initialize(const System& system) {
...@@ -117,50 +117,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -117,50 +117,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create kernels. // Create kernels.
map<string, string> replacements; forceKernel = createInteractionKernel(kernelSource, parameters, true);
replacements["COMPUTE_INTERACTION"] = kernelSource;
stringstream args;
for (int i = 0; i < parameters.size(); i++) {
args << ", __global ";
args << parameters[i].type;
args << "* global_";
args << parameters[i].name;
args << ", __local ";
args << parameters[i].type;
args << "* local_";
args << parameters[i].name;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
for (int i = 0; i < parameters.size(); i++) {
load1 << parameters[i].type;
load1 << " ";
load1 << parameters[i].name;
load1 << "1 = global_";
load1 << parameters[i].name;
load1 << "[i];";
}
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream load2j;
for (int i = 0; i < parameters.size(); i++) {
load2j << parameters[i].type;
load2j << " ";
load2j << parameters[i].name;
load2j << "2 = local_";
load2j << parameters[i].name;
load2j << "[tbx+j];";
}
replacements["LOAD_ATOM2_PARAMETERS_J"] = load2j.str();
stringstream load2tj;
for (int i = 0; i < parameters.size(); i++) {
load2tj << parameters[i].type;
load2tj << " ";
load2tj << parameters[i].name;
load2tj << "2 = local_";
load2tj << parameters[i].name;
load2tj << "[tbx+tj];";
}
replacements["LOAD_ATOM2_PARAMETERS_TJ"] = load2tj.str();
map<string, string> defines; map<string, string> defines;
if (forceBufferPerAtomBlock) if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
...@@ -168,8 +125,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -168,8 +125,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (usePeriodic) if (usePeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
cl::Program forceProgram = context.createProgram(context.loadSourceFromFile("nonbonded.cl", replacements), defines);
forceKernel = cl::Kernel(forceProgram, "computeNonbonded");
cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines); cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds"); findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions"); findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
...@@ -271,7 +226,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -271,7 +226,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
} }
void OpenCLNonbondedUtilities::prepareInteractions() { void OpenCLNonbondedUtilities::prepareInteractions() {
hasComputedInteractions = false;
if (!useCutoff) if (!useCutoff)
return; return;
...@@ -308,34 +262,113 @@ void OpenCLNonbondedUtilities::prepareInteractions() { ...@@ -308,34 +262,113 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
} }
void OpenCLNonbondedUtilities::computeInteractions() { void OpenCLNonbondedUtilities::computeInteractions() {
if (hasComputedInteractions) invokeInteractionKernel(forceKernel, parameters);
return; }
hasComputedInteractions = true;
forceKernel.setArg<cl_int>(0, context.getPaddedNumAtoms()); cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo> params, bool useExclusions) const {
forceKernel.setArg<cl::Buffer>(1, context.getForceBuffers().getDeviceBuffer()); map<string, string> replacements;
forceKernel.setArg<cl::Buffer>(2, context.getEnergyBuffer().getDeviceBuffer()); replacements["COMPUTE_INTERACTION"] = source;
forceKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer()); stringstream args;
forceKernel.setArg<cl::Buffer>(4, exclusions->getDeviceBuffer()); for (int i = 0; i < params.size(); i++) {
forceKernel.setArg<cl::Buffer>(5, exclusionIndex->getDeviceBuffer()); args << ", __global ";
forceKernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); args << params[i].getType();
forceKernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); args << "* global_";
args << params[i].getName();
args << ", __local ";
args << params[i].getType();
args << "* local_";
args << params[i].getName();
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
// local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon1;
stringstream loadLocal1;
for (int i = 0; i < params.size(); i++) {
loadLocal1 << "local_";
loadLocal1 << params[i].getName();
loadLocal1 << "[get_local_id(0)] = ";
loadLocal1 << params[i].getName();
loadLocal1 << "1;";
}
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
// local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j];
stringstream loadLocal2;
for (int i = 0; i < params.size(); i++) {
loadLocal2 << "local_";
loadLocal2 << params[i].getName();
loadLocal2 << "[get_local_id(0)] = global_";
loadLocal2 << params[i].getName();
loadLocal2 << "[j];";
}
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
stringstream load1;
for (int i = 0; i < params.size(); i++) {
load1 << params[i].getType();
load1 << " ";
load1 << params[i].getName();
load1 << "1 = global_";
load1 << params[i].getName();
load1 << "[i];";
}
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream load2j;
for (int i = 0; i < params.size(); i++) {
load2j << params[i].getType();
load2j << " ";
load2j << params[i].getName();
load2j << "2 = local_";
load2j << params[i].getName();
load2j << "[tbx+j];";
}
replacements["LOAD_ATOM2_PARAMETERS_J"] = load2j.str();
stringstream load2tj;
for (int i = 0; i < params.size(); i++) {
load2tj << params[i].getType();
load2tj << " ";
load2tj << params[i].getName();
load2tj << "2 = local_";
load2tj << params[i].getName();
load2tj << "[tbx+tj];";
}
replacements["LOAD_ATOM2_PARAMETERS_TJ"] = load2tj.str();
map<string, string> defines;
if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (useExclusions)
defines["USE_EXCLUSIONS"] = "1";
cl::Program program = context.createProgram(context.loadSourceFromFile("nonbonded.cl", replacements), defines);
return cl::Kernel(program, "computeNonbonded");
}
void OpenCLNonbondedUtilities::invokeInteractionKernel(cl::Kernel kernel, const vector<ParameterInfo>& params) {
kernel.setArg<cl_int>(0, context.getPaddedNumAtoms());
kernel.setArg<cl::Buffer>(1, context.getForceBuffers().getDeviceBuffer());
kernel.setArg<cl::Buffer>(2, context.getEnergyBuffer().getDeviceBuffer());
kernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(4, exclusions->getDeviceBuffer());
kernel.setArg<cl::Buffer>(5, exclusionIndex->getDeviceBuffer());
kernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
kernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
int paramBase = 10; int paramBase = 10;
if (useCutoff) { if (useCutoff) {
paramBase = 14; paramBase = 14;
forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_float>(9, cutoff*cutoff); kernel.setArg<cl_float>(9, cutoff*cutoff);
forceKernel.setArg<mm_float4>(10, periodicBoxSize); kernel.setArg<mm_float4>(10, periodicBoxSize);
forceKernel.setArg<cl::Buffer>(11, interactionFlags->getDeviceBuffer()); kernel.setArg<cl::Buffer>(11, interactionFlags->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(12, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(12, interactionCount->getDeviceBuffer());
forceKernel.setArg(13, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); kernel.setArg(13, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
} }
else { else {
forceKernel.setArg<cl::Buffer>(8, tiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(8, tiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(9, tiles->getSize()); kernel.setArg<cl_uint>(9, tiles->getSize());
} }
for (int i = 0; i < (int) parameters.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
forceKernel.setArg<cl::Buffer>(i*2+paramBase, *parameters[i].buffer); kernel.setArg<cl::Buffer>(i*2+paramBase, params[i].getBuffer());
forceKernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*parameters[i].size, NULL); kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL);
} }
context.executeKernel(forceKernel, tiles->getSize()*OpenCLContext::TileSize); context.executeKernel(kernel, tiles->getSize()*OpenCLContext::TileSize);
} }
...@@ -37,16 +37,37 @@ namespace OpenMM { ...@@ -37,16 +37,37 @@ namespace OpenMM {
class OpenCLCompact; class OpenCLCompact;
/** /**
* This class implements features that are used by several different force. It provides * This class provides a generic interface for calculating nonbonded interactions. It does this in two
* a generic interface for calculating nonbonded interactions. * ways. First, it can be used to create and invoke Kernels that evaluate nonbonded interactions. Clients
* only need to provide the code for evaluating a single interaction and the list of parameters it depends on.
* A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on all
* atoms.
*
* Second, this class itself creates and invokes a single "default" interaction kernel, allowing several
* different forces to be evaluated at once for greater efficiency. Call addInteraction() and addParameter()
* to add interactions to this default kernel.
*
* During each force or energy evaluation, the following sequence of steps takes place:
*
* 1. Data structures (e.g. neighbor lists) are calculated to allow nonbonded interactions to be evaluated
* quickly.
*
* 2. calcForces() or calcEnergy() is called on each ForceImpl in the System.
*
* 3. Finally, the default interaction kernel is invoked to calculate all interactions that were added
* to it.
*
* This sequence means that the default interaction kernel may depend on quantities that were calculated
* by ForceImpls during calcForces() or calcEnergy().
*/ */
class OpenCLNonbondedUtilities { class OpenCLNonbondedUtilities {
public: public:
class ParameterInfo;
OpenCLNonbondedUtilities(OpenCLContext& context); OpenCLNonbondedUtilities(OpenCLContext& context);
~OpenCLNonbondedUtilities(); ~OpenCLNonbondedUtilities();
/** /**
* Add a nonbonded interaction. * Add a nonbonded interaction to be evaluated by the default interaction kernel.
* *
* @param usesCutoff specifies whether a cutoff should be applied to this interaction * @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction * @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
...@@ -56,14 +77,9 @@ public: ...@@ -56,14 +77,9 @@ public:
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel); void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel);
/** /**
* Add a per-atom parameter that interactions may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
*/ */
void addParameter(const std::string& name, const std::string& type, int size, cl::Buffer& buffer); void addParameter(const ParameterInfo& parameter);
/** /**
* Initialize this object in preparation for a simulation. * Initialize this object in preparation for a simulation.
*/ */
...@@ -86,6 +102,12 @@ public: ...@@ -86,6 +102,12 @@ public:
bool getUsePeriodic() { bool getUsePeriodic() {
return usePeriodic; return usePeriodic;
} }
/**
* Get whether there is one force buffer per atom block.
*/
bool getForceBufferPerAtomBlock() {
return forceBufferPerAtomBlock;
}
/** /**
* Get the cutoff distance. * Get the cutoff distance.
*/ */
...@@ -143,8 +165,24 @@ public: ...@@ -143,8 +165,24 @@ public:
OpenCLArray<cl_uint>& getInteractionFlags() { OpenCLArray<cl_uint>& getInteractionFlags() {
return *interactionFlags; return *interactionFlags;
} }
/**
* Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions
* are assumed to be the same as those for the default interaction Kernel, since this kernel will use
* the same neighbor list.
*
* @param source the source code for evaluating the force and energy
* @param params the per-atom parameters this kernel may depend on
* @param useExclusions specifies whether exclusions are applied to this interaction
*/
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo> params, bool useExclusions) const;
/**
* Invoke a Kernel that was created by createInteractionKernel.
*
* @param kernel the Kernel to invoke
* @param params the per-atom parameters to pass to the Kernel
*/
void invokeInteractionKernel(cl::Kernel kernel, const std::vector<ParameterInfo>& params);
private: private:
class ParameterInfo;
OpenCLContext& context; OpenCLContext& context;
cl::Kernel forceKernel; cl::Kernel forceKernel;
cl::Kernel findBlockBoundsKernel; cl::Kernel findBlockBoundsKernel;
...@@ -164,16 +202,41 @@ private: ...@@ -164,16 +202,41 @@ private:
std::string kernelSource; std::string kernelSource;
std::map<std::string, std::string> kernelDefines; std::map<std::string, std::string> kernelDefines;
double cutoff; double cutoff;
bool useCutoff, usePeriodic, forceBufferPerAtomBlock, hasComputedInteractions; bool useCutoff, usePeriodic, forceBufferPerAtomBlock;
int numForceBuffers; int numForceBuffers;
mm_float4 periodicBoxSize; mm_float4 periodicBoxSize;
}; };
/**
* This class stores information about a per-atom parameter that may be used in a nonbonded kernel.
*/
class OpenCLNonbondedUtilities::ParameterInfo { class OpenCLNonbondedUtilities::ParameterInfo {
public: public:
/**
* Create a ParameterInfo object.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
*/
ParameterInfo(const std::string& name, const std::string& type, int size, cl::Buffer& buffer) : ParameterInfo(const std::string& name, const std::string& type, int size, cl::Buffer& buffer) :
name(name), type(type), size(size), buffer(&buffer) { name(name), type(type), size(size), buffer(&buffer) {
} }
const std::string& getName() const {
return name;
}
const std::string& getType() const {
return type;
}
int getSize() const {
return size;
}
cl::Buffer& getBuffer() const {
return *buffer;
}
private:
std::string name; std::string name;
std::string type; std::string type;
int size; int size;
......
...@@ -53,7 +53,7 @@ OpenCLPlatform::OpenCLPlatform() { ...@@ -53,7 +53,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory); registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory); registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
// registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory); // registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
// registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory); registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory); registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
// registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory); // registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
......
const unsigned int TileSize = 32;
const float dielectricOffset = 0.009f;
/**
* Compute the Born sum.
*/
__kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* global_bornSum, __local float* local_bornSum, __global float4* posq,
__local float4* local_posq, __global float2* global_params, __local float2* local_params, __global unsigned int* tiles,
#ifdef USE_CUTOFF
float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local float* tempBuffer) {
#else
unsigned int numTiles) {
#endif
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TileSize;
x = (x>>17)*TileSize;
unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx;
unsigned int i = x + tgx;
float bornSum = 0.0f;
float4 posq1 = posq[i];
float2 params1 = global_params[i];
if (x == y) {
// This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1;
local_params[get_local_id(0)] = params1;
unsigned int xi = x/TileSize;
unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2;
for (unsigned int j = 0; j < TileSize; j++) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (i < numAtoms && x+j < numAtoms && r2 < cutoffSquared) {
#else
if (i < numAtoms && x+j < numAtoms) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
float u_ij = 1.0f/rScaledRadiusJ;
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = log(u_ij / l_ij);
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
(0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
if (params1.x < params2.x-r)
bornSum += 2.0f*(1.0f/params1.x-l_ij);
}
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
#else
unsigned int offset = x + tgx + warp*paddedNumAtoms;
#endif
global_bornSum[offset] += bornSum;
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
local_params[get_local_id(0)] = global_params[j];
}
local_bornSum[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TileSize; j++) {
if ((flags&(1<<j)) != 0) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (i < numAtoms && x+j < numAtoms && r2 < cutoffSquared) {
#else
if (i < numAtoms && x+j < numAtoms) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float2 params2 = local_params[tbx+j];
float rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
float u_ij = 1.0f/rScaledRadiusJ;
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = log(u_ij / l_ij);
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
(0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
if (params1.x < params2.x-r)
bornSum += 2.0f*(1.0f/params1.x-l_ij);
}
float rScaledRadiusI = r+params1.y;
if ((j != tgx) && (params2.x < rScaledRadiusJ)) {
float l_ij = 1.0f/max(params2.x, fabs(r-params1.y));
float u_ij = 1.0f/rScaledRadiusJ;
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = log(u_ij / l_ij);
float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
(0.25f*params1.y*params1.y*invR)*(l_ij2-u_ij2);
if (params2.x < params1.x-r)
term += 2.0f*(1.0f/params2.x-l_ij);
tempBuffer[get_local_id(0)] = term;
}
}
// Sum the forces on atom j.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_bornSum[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
for (unsigned int j = 0; j < TileSize; j++) {
float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (i < numAtoms && x+tj < numAtoms && r2 < cutoffSquared) {
#else
if (i < numAtoms && x+tj < numAtoms) {
#endif
float r = sqrt(r2);
float invR = 1.0f/r;
float2 params2 = local_params[tbx+tj];
float rScaledRadiusJ = r+params2.y;
if ((tj != tgx) && (params1.x < rScaledRadiusJ)) {
float l_ij = 1.0f/max(params1.x, fabs(r-params2.y));
float u_ij = 1.0f/rScaledRadiusJ;
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = log(u_ij / l_ij);
bornSum += l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
(0.25f*params2.y*params2.y*invR)*(l_ij2-u_ij2);
if (params1.x < params2.x-r)
bornSum += 2.0f*(1.0f/params1.x-l_ij);
}
float rScaledRadiusI = r+params1.y;
if ((tj != tgx) && (params2.x < rScaledRadiusJ)) {
float l_ij = 1.0f/max(params2.x, fabs(r-params1.y));
float u_ij = 1.0f/rScaledRadiusJ;
float l_ij2 = l_ij*l_ij;
float u_ij2 = u_ij*u_ij;
float ratio = log(u_ij / l_ij);
float term = l_ij - u_ij + 0.25f*r*(u_ij2-l_ij2) + (0.50f*invR*ratio) +
(0.25f*params1.y*params1.y*invR)*(l_ij2-u_ij2);
if (params2.x < params1.x-r)
term += 2.0f*(1.0f/params2.x-l_ij);
local_bornSum[tbx+tj] = term;
}
}
tj = (tj + 1) & (TileSize - 1);
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TileSize)*paddedNumAtoms;
unsigned int offset2 = y + tgx + (x/TileSize)*paddedNumAtoms;
#else
unsigned int offset1 = x + tgx + warp*paddedNumAtoms;
unsigned int offset2 = y + tgx + warp*paddedNumAtoms;
#endif
global_bornSum[offset1] += bornSum;
global_bornSum[offset2] += local_bornSum[get_local_id(0)];
lasty = y;
}
pos++;
}
}
/**
* Reduce the Born sums to compute the Born radii.
*/
__kernel void reduceBornSum(int numAtoms, int bufferSize, int numBuffers, float alpha, float beta, float gamma,
__global float* bornSum, __global float2* params, __global float* bornRadii, __global float* obcChain) {
unsigned int index = get_global_id(0);
while (index < numAtoms) {
// Get summed Born data
int totalSize = bufferSize*numBuffers;
float sum = bornSum[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += bornSum[i];
bornSum[index] = sum;
// Now calculate Born radius and OBC term.
float offsetRadius = params[index].x;
sum *= 0.5f*offsetRadius;
float sum2 = sum*sum;
float sum3 = sum*sum2;
float tanhSum = tanh(alpha*sum - beta*sum2 + gamma*sum3);
float nonOffsetRadii = offsetRadius + dielectricOffset;
float radius = 1.0f/(1.0f/offsetRadius - tanhSum/nonOffsetRadii);
float chain = offsetRadius*(alpha - 2.0f*beta*sum + 3.0f*gamma*sum2);
chain = (1.0f-tanhSum*tanhSum)*chain / nonOffsetRadii;
bornRadii[index] = radius;
obcChain[index] = chain;
index += get_global_size(0);
}
}
...@@ -39,12 +39,16 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -39,12 +39,16 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
// This tile is on the diagonal. // This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1; local_posq[get_local_id(0)] = posq1;
local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon1; LOAD_LOCAL_PARAMETERS_FROM_1
unsigned int xi = x/TileSize; unsigned int xi = x/TileSize;
unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2; unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndices[tile]+tgx]; unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
#endif
for (unsigned int j = 0; j < TileSize; j++) { for (unsigned int j = 0; j < TileSize; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
...@@ -59,33 +63,35 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -59,33 +63,35 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
#ifdef USE_CUTOFF #if defined USE_CUTOFF || defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded || r2 > cutoffSquared) { if (isExcluded || r2 > cutoffSquared) {
#else #elif defined USE_CUTOFF
if (r2 > cutoffSquared) {
#elif defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded) { if (isExcluded) {
#endif #endif
dEdR = 0.0f; dEdR = 0.0f;
tempEnergy = 0.0f; tempEnergy = 0.0f;
} }
#endif
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
excl >>= 1;
} }
// Write results // Write results
float4 of; float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz = force.xyz;
of.w = 0.0f;
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms; unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of;
#else #else
unsigned int offset = x + tgx + warp*paddedNumAtoms; unsigned int offset = x + tgx + warp*paddedNumAtoms;
#endif
of = forceBuffers[offset]; of = forceBuffers[offset];
of.xyz += force.xyz; of.xyz += force.xyz;
forceBuffers[offset] = of; forceBuffers[offset] = of;
#endif
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -93,7 +99,7 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -93,7 +99,7 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
if (lasty != y) { if (lasty != y) {
unsigned int j = y + tgx; unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j]; local_posq[get_local_id(0)] = posq[j];
local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j]; LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
} }
local_force[get_local_id(0)] = 0.0f; local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -157,10 +163,14 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -157,10 +163,14 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
unsigned int xi = x/TileSize; unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize; unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2; unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF); unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TileSize - tgx)); excl = (excl >> tgx) | (excl << (TileSize - tgx));
#endif
for (unsigned int j = 0; j < TileSize; j++) { for (unsigned int j = 0; j < TileSize; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif
float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f); float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
...@@ -175,19 +185,24 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -175,19 +185,24 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
#ifdef USE_CUTOFF #if defined USE_CUTOFF || defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded || r2 > cutoffSquared) { if (isExcluded || r2 > cutoffSquared) {
#else #elif defined USE_CUTOFF
if (r2 > cutoffSquared) {
#elif defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded) { if (isExcluded) {
#endif #endif
dEdR = 0.0f; dEdR = 0.0f;
tempEnergy = 0.0f; tempEnergy = 0.0f;
} }
#endif
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz; local_force[tbx+tj].xyz += delta.xyz;
excl >>= 1;
tj = (tj + 1) & (TileSize - 1); tj = (tj + 1) & (TileSize - 1);
} }
} }
...@@ -195,23 +210,18 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -195,23 +210,18 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
// Write results // Write results
float4 of; float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz = force.xyz; unsigned int offset1 = x + tgx + (y/TileSize)*paddedNumAtoms;
of.w = 0.0f; unsigned int offset2 = y + tgx + (x/TileSize)*paddedNumAtoms;
unsigned int offset = x + tgx + (y/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of;
of = local_force[get_local_id(0)];
offset = y + tgx + (x/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of;
#else #else
unsigned int offset = x + tgx + warp*paddedNumAtoms; unsigned int offset1 = x + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset]; unsigned int offset2 = y + tgx + warp*paddedNumAtoms;
#endif
of = forceBuffers[offset1];
of.xyz += force.xyz; of.xyz += force.xyz;
forceBuffers[offset] = of; forceBuffers[offset1] = of;
offset = y + tgx + warp*paddedNumAtoms; of = forceBuffers[offset2];
of = forceBuffers[offset];
of.xyz += local_force[get_local_id(0)].xyz; of.xyz += local_force[get_local_id(0)].xyz;
forceBuffers[offset] = of; forceBuffers[offset2] = of;
#endif
lasty = y; lasty = y;
} }
pos++; pos++;
......
...@@ -4,12 +4,11 @@ ...@@ -4,12 +4,11 @@
__kernel void clearBuffer(__global float* buffer, int size) { __kernel void clearBuffer(__global float* buffer, int size) {
int index = get_global_id(0); int index = get_global_id(0);
int step = get_global_size(0);
__global float4* buffer4 = (__global float4*) buffer; __global float4* buffer4 = (__global float4*) buffer;
int sizeDiv4 = size/4; int sizeDiv4 = size/4;
while (index < sizeDiv4) { while (index < sizeDiv4) {
buffer4[index] = (float4) (0.0f); buffer4[index] = (float4) (0.0f);
index += step; index += get_global_size(0);
} }
if (get_global_id(0) == 0) if (get_global_id(0) == 0)
for (int i = sizeDiv4*4; i < size; i++) for (int i = sizeDiv4*4; i < size; i++)
...@@ -22,13 +21,12 @@ __kernel void clearBuffer(__global float* buffer, int size) { ...@@ -22,13 +21,12 @@ __kernel void clearBuffer(__global float* buffer, int size) {
__kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int numBuffers) { __kernel void reduceFloat4Buffer(__global float4* buffer, int bufferSize, int numBuffers) {
int index = get_global_id(0); int index = get_global_id(0);
int step = get_global_size(0);
int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
while (index < bufferSize) { while (index < bufferSize) {
float4 sum = buffer[index]; float4 sum = buffer[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize) for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += buffer[i]; sum += buffer[i];
buffer[index] = sum; buffer[index] = sum;
index += step; index += get_global_size(0);
} }
} }
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the OpenCL implementation of GBSAOBCForce.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "OpenCLPlatform.h"
#include "ReferencePlatform.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/sfmt/SFMT.h"
#include "openmm/NonbondedForce.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSingleParticle() {
OpenCLPlatform platform;
System system;
system.addParticle(2.0);
LangevinIntegrator integrator(0, 0.1, 0.01);
GBSAOBCForce* gbsa = new GBSAOBCForce();
NonbondedForce* nonbonded = new NonbondedForce();
gbsa->addParticle( 0.5, 0.15, 1);
nonbonded->addParticle(0.5, 1, 0);
system.addForce(gbsa);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(1);
positions[0] = Vec3(0, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Energy);
double bornRadius = 0.15-0.009; // dielectric offset
double eps0 = EPSILON0;
double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius;
double extendedRadius = bornRadius+0.14; // probe radius
double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
// ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
}
void testCutoffAndPeriodic() {
OpenCLPlatform cl;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
LangevinIntegrator integrator(0, 0.1, 0.01);
GBSAOBCForce* gbsa = new GBSAOBCForce();
NonbondedForce* nonbonded = new NonbondedForce();
gbsa->addParticle(-1, 0.15, 1);
nonbonded->addParticle(-1, 1, 0);
gbsa->addParticle(1, 0.15, 1);
nonbonded->addParticle(1, 1, 0);
const double cutoffDistance = 3.0;
const double boxSize = 10.0;
nonbonded->setCutoffDistance(cutoffDistance);
gbsa->setCutoffDistance(cutoffDistance);
system.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
system.addForce(gbsa);
system.addForce(nonbonded);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
// Calculate the forces for both cutoff and periodic with two different atom positions.
nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
Context context(system, integrator, cl);
context.setPositions(positions);
State state1 = context.getState(State::Forces);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
context.reinitialize();
context.setPositions(positions);
State state2 = context.getState(State::Forces);
positions[1][0]+= boxSize;
nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
context.reinitialize();
context.setPositions(positions);
State state3 = context.getState(State::Forces);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
context.reinitialize();
context.setPositions(positions);
State state4 = context.getState(State::Forces);
// All forces should be identical, exception state3 which should be zero.
ASSERT_EQUAL_VEC(state1.getForces()[0], state2.getForces()[0], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[1], state2.getForces()[1], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[0], state4.getForces()[0], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[1], state4.getForces()[1], 0.01);
ASSERT_EQUAL_VEC(state3.getForces()[0], Vec3(0, 0, 0), 0.01);
ASSERT_EQUAL_VEC(state3.getForces()[1], Vec3(0, 0, 0), 0.01);
}
void testForce(int numParticles, NonbondedForce::NonbondedMethod method, GBSAOBCForce::NonbondedMethod method2) {
OpenCLPlatform cl;
ReferencePlatform reference;
System system;
LangevinIntegrator integrator(0, 0.1, 0.01);
GBSAOBCForce* gbsa = new GBSAOBCForce();
NonbondedForce* nonbonded = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(1.0);
double charge = i%2 == 0 ? -1 : 1;
gbsa->addParticle(charge, 0.15, 1);
nonbonded->addParticle(charge, 1, 0);
}
nonbonded->setNonbondedMethod(method);
gbsa->setNonbondedMethod(method2);
nonbonded->setCutoffDistance(3.0);
gbsa->setCutoffDistance(3.0);
int grid = (int) floor(0.5+pow(numParticles, 1.0/3.0));
if (method == NonbondedForce::CutoffPeriodic) {
double boxSize = (grid+1)*1.1;
system.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
}
system.addForce(gbsa);
system.addForce(nonbonded);
Context context(system, integrator, cl);
Context refContext(system, integrator, reference);
// Set random (but uniformly distributed) positions for all the particles.
vector<Vec3> positions(numParticles);
init_gen_rand(0);
for (int i = 0; i < grid; i++)
for (int j = 0; j < grid; j++)
for (int k = 0; k < grid; k++)
positions[i*grid*grid+j*grid+k] = Vec3(i*1.1, j*1.1, k*1.1);
for (int i = 0; i < numParticles; ++i)
positions[i] = positions[i] + Vec3(0.5*genrand_real2(), 0.5*genrand_real2(), 0.5*genrand_real2());
context.setPositions(positions);
refContext.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
State refState = refContext.getState(State::Forces | State::Energy);
// Make sure the OpenCL and Reference platforms agree.
double norm = 0.0;
double diff = 0.0;
for (int i = 0; i < numParticles; ++i) {
Vec3 f = state.getForces()[i];
norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
Vec3 delta = f-refState.getForces()[i];
diff += delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
}
norm = std::sqrt(norm);
diff = std::sqrt(diff);
ASSERT_EQUAL_TOL(0.0, diff, 0.001*norm);
ASSERT_EQUAL_TOL(state.getPotentialEnergy(), refState.getPotentialEnergy(), 1e-3);
// Take a small step in the direction of the energy gradient. (This doesn't work with cutoffs, since the energy
// changes discontinuously at the cutoff distance.)
if (method == NonbondedForce::NoCutoff)
{
const double delta = 1e-2;
double step = delta/norm;
for (int i = 0; i < numParticles; ++i) {
Vec3 p = positions[i];
Vec3 f = state.getForces()[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
}
context.setPositions(positions);
// See whether the potential energy changed by the expected amount.
State state2 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state.getPotentialEnergy())/delta, 1e-3*abs(state.getPotentialEnergy()));
}
}
int main() {
try {
testSingleParticle();
testCutoffAndPeriodic();
// for (int i = 5; i < 11; i++) {
// testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff);
// testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic);
// testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic);
// }
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment