Commit 5a89d510 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement multi-GPU support

parent 648d38d9
...@@ -40,6 +40,22 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -40,6 +40,22 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLParallelCalcForcesAndEnergyKernel(name, platform, data); return new OpenCLParallelCalcForcesAndEnergyKernel(name, platform, data);
if (name == CalcHarmonicBondForceKernel::Name()) if (name == CalcHarmonicBondForceKernel::Name())
return new OpenCLParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem()); return new OpenCLParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
return new OpenCLParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcHarmonicAngleForceKernel::Name())
return new OpenCLParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomAngleForceKernel::Name())
return new OpenCLParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcPeriodicTorsionForceKernel::Name())
return new OpenCLParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcRBTorsionForceKernel::Name())
return new OpenCLParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCMAPTorsionForceKernel::Name())
return new OpenCLParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomTorsionForceKernel::Name())
return new OpenCLParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new OpenCLParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
} }
OpenCLContext& cl = *data.contexts[0]; OpenCLContext& cl = *data.contexts[0];
if (name == CalcForcesAndEnergyKernel::Name()) if (name == CalcForcesAndEnergyKernel::Name())
......
...@@ -318,7 +318,10 @@ OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() { ...@@ -318,7 +318,10 @@ OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
} }
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) { void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
numBonds = force.getNumBonds(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0) if (numBonds == 0)
return; return;
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams"); params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
...@@ -334,7 +337,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus ...@@ -334,7 +337,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
int particle1, particle2; int particle1, particle2;
vector<double> parameters; vector<double> parameters;
force.getBondParameters(i, particle1, particle2, parameters); force.getBondParameters(startIndex+i, particle1, particle2, parameters);
paramVector[i].resize(parameters.size()); paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j]; paramVector[i][j] = (cl_float) parameters[j];
...@@ -460,7 +463,10 @@ OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() { ...@@ -460,7 +463,10 @@ OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
} }
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) { void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
numAngles = force.getNumAngles(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0) if (numAngles == 0)
return; return;
params = new OpenCLArray<mm_float2>(cl, numAngles, "angleParams"); params = new OpenCLArray<mm_float2>(cl, numAngles, "angleParams");
...@@ -471,7 +477,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const ...@@ -471,7 +477,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3; int particle1, particle2, particle3;
double angle, k; double angle, k;
force.getAngleParameters(i, particle1, particle2, particle3, angle, k); force.getAngleParameters(startIndex+i, particle1, particle2, particle3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k); paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
indicesVector[i] = mm_int8(particle1, particle2, particle3, indicesVector[i] = mm_int8(particle1, particle2, particle3,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0); forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0);
...@@ -544,7 +550,10 @@ OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() { ...@@ -544,7 +550,10 @@ OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
} }
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) { void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
numAngles = force.getNumAngles(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0) if (numAngles == 0)
return; return;
params = new OpenCLParameterSet(cl, force.getNumPerAngleParameters(), numAngles, "customAngleParams"); params = new OpenCLParameterSet(cl, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
...@@ -560,7 +569,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu ...@@ -560,7 +569,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3; int particle1, particle2, particle3;
vector<double> parameters; vector<double> parameters;
force.getAngleParameters(i, particle1, particle2, particle3, parameters); force.getAngleParameters(startIndex+i, particle1, particle2, particle3, parameters);
paramVector[i].resize(parameters.size()); paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j]; paramVector[i][j] = (cl_float) parameters[j];
...@@ -688,7 +697,10 @@ OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() { ...@@ -688,7 +697,10 @@ OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
} }
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) { void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
numTorsions = force.getNumTorsions(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0) if (numTorsions == 0)
return; return;
params = new OpenCLArray<mm_float4>(cl, numTorsions, "periodicTorsionParams"); params = new OpenCLArray<mm_float4>(cl, numTorsions, "periodicTorsionParams");
...@@ -699,7 +711,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons ...@@ -699,7 +711,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4, periodicity; int particle1, particle2, particle3, particle4, periodicity;
double phase, k; double phase, k;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k); force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f); paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4, indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++); forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
...@@ -768,7 +780,10 @@ OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() { ...@@ -768,7 +780,10 @@ OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
} }
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) { void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
numTorsions = force.getNumTorsions(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0) if (numTorsions == 0)
return; return;
params = new OpenCLArray<mm_float8>(cl, numTorsions, "rbTorsionParams"); params = new OpenCLArray<mm_float8>(cl, numTorsions, "rbTorsionParams");
...@@ -779,7 +794,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo ...@@ -779,7 +794,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4; int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5; double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5); force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f); paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4, indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++); forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
...@@ -854,7 +869,10 @@ OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() { ...@@ -854,7 +869,10 @@ OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
} }
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) { void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
numTorsions = force.getNumTorsions(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0) if (numTorsions == 0)
return; return;
int numMaps = force.getNumMaps(); int numMaps = force.getNumMaps();
...@@ -881,7 +899,7 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM ...@@ -881,7 +899,7 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<mm_int16> torsionIndicesVec(numTorsions); vector<mm_int16> torsionIndicesVec(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
mm_int16& ind = torsionIndicesVec[i]; mm_int16& ind = torsionIndicesVec[i];
force.getTorsionParameters(i, torsionMapsVec[i], ind.s0, ind.s1, ind.s2, ind.s3, ind.s4, ind.s5, ind.s6, ind.s7); force.getTorsionParameters(startIndex+i, torsionMapsVec[i], ind.s0, ind.s1, ind.s2, ind.s3, ind.s4, ind.s5, ind.s6, ind.s7);
ind.s8 = forceBufferCounter[ind.s0]++; ind.s8 = forceBufferCounter[ind.s0]++;
ind.s9 = forceBufferCounter[ind.s1]++; ind.s9 = forceBufferCounter[ind.s1]++;
ind.s10 = forceBufferCounter[ind.s2]++; ind.s10 = forceBufferCounter[ind.s2]++;
...@@ -967,7 +985,10 @@ OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() { ...@@ -967,7 +985,10 @@ OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
} }
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) { void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
numTorsions = force.getNumTorsions(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0) if (numTorsions == 0)
return; return;
params = new OpenCLParameterSet(cl, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams"); params = new OpenCLParameterSet(cl, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
...@@ -983,7 +1004,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const ...@@ -983,7 +1004,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4; int particle1, particle2, particle3, particle4;
vector<double> parameters; vector<double> parameters;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, parameters); force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, parameters);
paramVector[i].resize(parameters.size()); paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++) for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j]; paramVector[i][j] = (cl_float) parameters[j];
...@@ -1197,7 +1218,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1197,7 +1218,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK); defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC); defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
} }
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
...@@ -1210,7 +1231,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1210,7 +1231,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha); defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI); ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
// Create the reciprocal space kernels. // Create the reciprocal space kernels.
...@@ -1236,7 +1257,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1236,7 +1257,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha); defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI); ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
pmeDefines["PME_ORDER"] = intToString(PmeOrder); pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles); pmeDefines["NUM_ATOMS"] = intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = doubleToString(M_PI*M_PI/(alpha*alpha)); pmeDefines["RECIP_EXP_FACTOR"] = doubleToString(M_PI*M_PI/(alpha*alpha));
...@@ -1343,7 +1364,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1343,7 +1364,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize the exceptions. // Initialize the exceptions.
int numExceptions = exceptions.size(); int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int maxBuffers = cl.getNonbondedUtilities().getNumForceBuffers(); int maxBuffers = cl.getNonbondedUtilities().getNumForceBuffers();
if (numExceptions > 0) { if (numExceptions > 0) {
exceptionParams = new OpenCLArray<mm_float4>(cl, numExceptions, "exceptionParams"); exceptionParams = new OpenCLArray<mm_float4>(cl, numExceptions, "exceptionParams");
...@@ -1354,7 +1378,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1354,7 +1378,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
int particle1, particle2; int particle1, particle2;
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[i], particle1, particle2, chargeProd, sigma, epsilon); force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f); exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionIndicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++); exceptionIndicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
} }
...@@ -1425,7 +1449,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1425,7 +1449,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
} }
if (exceptionIndices != NULL) if (exceptionIndices != NULL)
cl.executeKernel(exceptionsKernel, exceptionIndices->getSize()); cl.executeKernel(exceptionsKernel, exceptionIndices->getSize());
if (cosSinSums != NULL) { if (cosSinSums != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize(); mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0); mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0);
float recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z); float recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
...@@ -1436,7 +1460,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1436,7 +1460,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<cl_float>(4, recipCoefficient); ewaldForcesKernel.setArg<cl_float>(4, recipCoefficient);
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms()); cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
} }
if (pmeGrid != NULL) { if (pmeGrid != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize(); mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 invBoxSize = cl.getInvPeriodicBoxSize(); mm_float4 invBoxSize = cl.getInvPeriodicBoxSize();
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, boxSize); pmeUpdateBsplinesKernel.setArg<mm_float4>(5, boxSize);
......
...@@ -126,7 +126,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -126,7 +126,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create the list of tiles. // Create the list of tiles.
int numAtomBlocks = context.getNumAtomBlocks(); int numAtomBlocks = context.getNumAtomBlocks();
int numTiles = numAtomBlocks*(numAtomBlocks+1)/2; int totalTiles = numAtomBlocks*(numAtomBlocks+1)/2;
int numContexts = context.getPlatformData().contexts.size();
startTileIndex = context.getContextIndex()*totalTiles/numContexts;
int endTileIndex = (context.getContextIndex()+1)*totalTiles/numContexts;
numTiles = endTileIndex-startTileIndex;
// Build a list of indices for the tiles with exclusions. // Build a list of indices for the tiles with exclusions.
...@@ -210,10 +214,10 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -210,10 +214,10 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
mm_float4 boxSize = context.getPeriodicBoxSize(); mm_float4 boxSize = context.getPeriodicBoxSize();
int maxInteractingTiles = (int) (numTiles*(cutoff/boxSize.x+cutoff/boxSize.y+cutoff/boxSize.z)); int maxInteractingTiles = (int) (numTiles*(cutoff/boxSize.x+cutoff/boxSize.y+cutoff/boxSize.z));
if (maxInteractingTiles < 1)
maxInteractingTiles = 1;
if (maxInteractingTiles > numTiles) if (maxInteractingTiles > numTiles)
maxInteractingTiles = numTiles; maxInteractingTiles = numTiles;
if (maxInteractingTiles < 1)
maxInteractingTiles = 1;
interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles"); interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags"); interactionFlags = new OpenCLArray<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true); interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true);
...@@ -229,6 +233,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -229,6 +233,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (useCutoff) { if (useCutoff) {
map<string, string> defines; map<string, string> defines;
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["START_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex);
defines["END_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex+numTiles);
if (forceBufferPerAtomBlock) if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (usePeriodic) if (usePeriodic)
...@@ -441,6 +447,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -441,6 +447,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms()); defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["START_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex);
defines["END_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex+numTiles);
string file; string file;
if (deviceIsCpu) if (deviceIsCpu)
file = OpenCLKernelSources::nonbonded_cpu; file = OpenCLKernelSources::nonbonded_cpu;
...@@ -470,7 +478,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -470,7 +478,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer());
} }
else { else {
kernel.setArg<cl_uint>(index++, context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2); kernel.setArg<cl_uint>(index++, numTiles);
} }
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Memory>(index++, params[i].getMemory()); kernel.setArg<cl::Memory>(index++, params[i].getMemory());
......
...@@ -178,6 +178,18 @@ public: ...@@ -178,6 +178,18 @@ public:
OpenCLArray<cl_uint>& getExclusionRowIndices() { OpenCLArray<cl_uint>& getExclusionRowIndices() {
return *exclusionRowIndices; return *exclusionRowIndices;
} }
/**
* Get the index of the first tile this context is responsible for processing.
*/
int getStartTileIndex() const {
return startTileIndex;
}
/**
* Get the total number of tiles this context is responsible for processing.
*/
int getNumTiles() const {
return numTiles;
}
/** /**
* Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions * Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions
* are assumed to be the same as those for the default interaction Kernel, since this kernel will use * are assumed to be the same as those for the default interaction Kernel, since this kernel will use
...@@ -212,7 +224,7 @@ private: ...@@ -212,7 +224,7 @@ private:
std::map<std::string, std::string> kernelDefines; std::map<std::string, std::string> kernelDefines;
double cutoff; double cutoff;
bool useCutoff, usePeriodic, forceBufferPerAtomBlock, deviceIsCpu; bool useCutoff, usePeriodic, forceBufferPerAtomBlock, deviceIsCpu;
int numForceBuffers; int numForceBuffers, startTileIndex, numTiles;
}; };
/** /**
......
...@@ -91,6 +91,152 @@ void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system, ...@@ -91,6 +91,152 @@ void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system,
} }
double OpenCLParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomBondForceKernel::OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomBondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcHarmonicAngleForceKernel::OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcHarmonicAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcHarmonicAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomAngleForceKernel::OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcPeriodicTorsionForceKernel::OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcPeriodicTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcPeriodicTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++) for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).execute(context, includeForces, includeEnergy); energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcRBTorsionForceKernel::OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcRBTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcRBTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCMAPTorsionForceKernel::OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCMAPTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCMAPTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomTorsionForceKernel::OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcNonbondedForceKernel::OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
} }
...@@ -111,10 +111,10 @@ private: ...@@ -111,10 +111,10 @@ private:
*/ */
class OpenCLParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel { class OpenCLParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public: public:
OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomBondForceKernel(name, platform), OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcCustomBondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomBondForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcCustomBondForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -133,6 +133,7 @@ public: ...@@ -133,6 +133,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -140,10 +141,10 @@ private: ...@@ -140,10 +141,10 @@ private:
*/ */
class OpenCLParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel { class OpenCLParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public: public:
OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcHarmonicAngleForceKernel(name, platform), OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcHarmonicAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcHarmonicAngleForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcHarmonicAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -162,6 +163,7 @@ public: ...@@ -162,6 +163,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -169,10 +171,10 @@ private: ...@@ -169,10 +171,10 @@ private:
*/ */
class OpenCLParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel { class OpenCLParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public: public:
OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomAngleForceKernel(name, platform), OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcCustomAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomAngleForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcCustomAngleForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -191,6 +193,7 @@ public: ...@@ -191,6 +193,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -198,10 +201,10 @@ private: ...@@ -198,10 +201,10 @@ private:
*/ */
class OpenCLParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel { class OpenCLParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public: public:
OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcPeriodicTorsionForceKernel(name, platform), OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcPeriodicTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcPeriodicTorsionForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcPeriodicTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -220,6 +223,7 @@ public: ...@@ -220,6 +223,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -227,10 +231,10 @@ private: ...@@ -227,10 +231,10 @@ private:
*/ */
class OpenCLParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel { class OpenCLParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public: public:
OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcRBTorsionForceKernel(name, platform), OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcRBTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcRBTorsionForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcRBTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -249,6 +253,7 @@ public: ...@@ -249,6 +253,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -256,10 +261,10 @@ private: ...@@ -256,10 +261,10 @@ private:
*/ */
class OpenCLParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel { class OpenCLParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public: public:
OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCMAPTorsionForceKernel(name, platform), OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcCMAPTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCMAPTorsionForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcCMAPTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -278,6 +283,7 @@ public: ...@@ -278,6 +283,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -285,10 +291,10 @@ private: ...@@ -285,10 +291,10 @@ private:
*/ */
class OpenCLParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel { class OpenCLParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public: public:
OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomTorsionForceKernel(name, platform), OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcCustomTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomTorsionForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcCustomTorsionForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -307,6 +313,7 @@ public: ...@@ -307,6 +313,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
...@@ -314,10 +321,10 @@ private: ...@@ -314,10 +321,10 @@ private:
*/ */
class OpenCLParallelCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class OpenCLParallelCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcNonbondedForceKernel(name, platform), OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
data(data) { OpenCLCalcNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcNonbondedForceKernel&>(kernels[index].getImpl());
} }
~OpenCLParallelCalcNonbondedForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -336,6 +343,7 @@ public: ...@@ -336,6 +343,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
}; };
/** /**
......
...@@ -172,12 +172,11 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -172,12 +172,11 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
for (int i = 0; i < BUFFER_GROUPS; ++i) for (int i = 0; i < BUFFER_GROUPS; ++i)
valid[i*GROUP_SIZE+get_local_id(0)] = false; valid[i*GROUP_SIZE+get_local_id(0)] = false;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
const int numTiles = (NUM_BLOCKS*(NUM_BLOCKS+1))/2; for (int baseIndex = START_TILE_INDEX+get_group_id(0)*get_local_size(0); baseIndex < END_TILE_INDEX; baseIndex += get_global_size(0)) {
for (int baseIndex = get_group_id(0)*get_local_size(0); baseIndex < numTiles; baseIndex += get_global_size(0)) {
// Identify the pair of blocks to compare. // Identify the pair of blocks to compare.
int index = baseIndex+get_local_id(0); int index = baseIndex+get_local_id(0);
if (index < numTiles) { if (index < END_TILE_INDEX) {
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*index)); unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*index));
unsigned int x = (index-y*NUM_BLOCKS+y*(y+1)/2); unsigned int x = (index-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......
...@@ -126,9 +126,9 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -126,9 +126,9 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles) { __global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles) {
ushort2 buffer[BUFFER_SIZE]; ushort2 buffer[BUFFER_SIZE];
int valuesInBuffer = 0; int valuesInBuffer = 0;
const int numTiles = (NUM_BLOCKS*(NUM_BLOCKS+1))/2; const int numTiles = END_TILE_INDEX-START_TILE_INDEX;
unsigned int start = get_group_id(0)*numTiles/get_num_groups(0); unsigned int start = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
for (int index = start; index < end; index++) { for (int index = start; index < end; index++) {
// Identify the pair of blocks to compare. // Identify the pair of blocks to compare.
......
...@@ -21,11 +21,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en ...@@ -21,11 +21,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+get_group_id(0)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : get_group_id(0)*numTiles/get_num_groups(0));
unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(get_group_id(0)+1)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : (get_group_id(0)+1)*numTiles/get_num_groups(0));
#else #else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0); unsigned int pos = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
#endif #endif
float energy = 0.0f; float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
......
...@@ -22,11 +22,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -22,11 +22,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+get_group_id(0)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : get_group_id(0)*numTiles/get_num_groups(0));
unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(get_group_id(0)+1)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : (get_group_id(0)+1)*numTiles/get_num_groups(0));
#else #else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0); unsigned int pos = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
#endif #endif
float energy = 0.0f; float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
......
...@@ -24,11 +24,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -24,11 +24,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+warp*(END_TILE_INDEX-START_TILE_INDEX)/totalWarps : warp*numTiles/totalWarps);
unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(warp+1)*(END_TILE_INDEX-START_TILE_INDEX)/totalWarps : (warp+1)*numTiles/totalWarps);
#else #else
unsigned int pos = warp*numTiles/totalWarps; unsigned int pos = START_TILE_INDEX+warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps; unsigned int end = START_TILE_INDEX+(warp+1)*numTiles/totalWarps;
#endif #endif
float energy = 0.0f; float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment