Commit 5a89d510 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement multi-GPU support

parent 648d38d9
......@@ -40,6 +40,22 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLParallelCalcForcesAndEnergyKernel(name, platform, data);
if (name == CalcHarmonicBondForceKernel::Name())
return new OpenCLParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
return new OpenCLParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
if (name == CalcHarmonicAngleForceKernel::Name())
return new OpenCLParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomAngleForceKernel::Name())
return new OpenCLParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
if (name == CalcPeriodicTorsionForceKernel::Name())
return new OpenCLParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcRBTorsionForceKernel::Name())
return new OpenCLParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCMAPTorsionForceKernel::Name())
return new OpenCLParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcCustomTorsionForceKernel::Name())
return new OpenCLParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new OpenCLParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
}
OpenCLContext& cl = *data.contexts[0];
if (name == CalcForcesAndEnergyKernel::Name())
......
......@@ -318,7 +318,10 @@ OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
numBonds = force.getNumBonds();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
......@@ -334,7 +337,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
for (int i = 0; i < numBonds; i++) {
int particle1, particle2;
vector<double> parameters;
force.getBondParameters(i, particle1, particle2, parameters);
force.getBondParameters(startIndex+i, particle1, particle2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
......@@ -460,7 +463,10 @@ OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
numAngles = force.getNumAngles();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
params = new OpenCLArray<mm_float2>(cl, numAngles, "angleParams");
......@@ -471,7 +477,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
force.getAngleParameters(startIndex+i, particle1, particle2, particle3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
indicesVector[i] = mm_int8(particle1, particle2, particle3,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0);
......@@ -544,7 +550,10 @@ OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
}
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
numAngles = force.getNumAngles();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
......@@ -560,7 +569,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3;
vector<double> parameters;
force.getAngleParameters(i, particle1, particle2, particle3, parameters);
force.getAngleParameters(startIndex+i, particle1, particle2, particle3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
......@@ -688,7 +697,10 @@ OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
numTorsions = force.getNumTorsions();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLArray<mm_float4>(cl, numTorsions, "periodicTorsionParams");
......@@ -699,7 +711,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k);
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
......@@ -768,7 +780,10 @@ OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
numTorsions = force.getNumTorsions();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLArray<mm_float8>(cl, numTorsions, "rbTorsionParams");
......@@ -779,7 +794,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
......@@ -854,7 +869,10 @@ OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
numTorsions = force.getNumTorsions();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
......@@ -881,7 +899,7 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<mm_int16> torsionIndicesVec(numTorsions);
for (int i = 0; i < numTorsions; i++) {
mm_int16& ind = torsionIndicesVec[i];
force.getTorsionParameters(i, torsionMapsVec[i], ind.s0, ind.s1, ind.s2, ind.s3, ind.s4, ind.s5, ind.s6, ind.s7);
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], ind.s0, ind.s1, ind.s2, ind.s3, ind.s4, ind.s5, ind.s6, ind.s7);
ind.s8 = forceBufferCounter[ind.s0]++;
ind.s9 = forceBufferCounter[ind.s1]++;
ind.s10 = forceBufferCounter[ind.s2]++;
......@@ -967,7 +985,10 @@ OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
}
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
numTorsions = force.getNumTorsions();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
......@@ -983,7 +1004,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4;
vector<double> parameters;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, parameters);
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
......@@ -1197,7 +1218,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
}
if (force.getUseDispersionCorrection())
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
......@@ -1210,7 +1231,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
// Create the reciprocal space kernels.
......@@ -1236,7 +1257,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = doubleToString(M_PI*M_PI/(alpha*alpha));
......@@ -1343,7 +1364,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize the exceptions.
int numExceptions = exceptions.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int maxBuffers = cl.getNonbondedUtilities().getNumForceBuffers();
if (numExceptions > 0) {
exceptionParams = new OpenCLArray<mm_float4>(cl, numExceptions, "exceptionParams");
......@@ -1354,7 +1378,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[i], particle1, particle2, chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionIndicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
......@@ -1425,7 +1449,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
if (exceptionIndices != NULL)
cl.executeKernel(exceptionsKernel, exceptionIndices->getSize());
if (cosSinSums != NULL) {
if (cosSinSums != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0);
float recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
......@@ -1436,7 +1460,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<cl_float>(4, recipCoefficient);
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid != NULL) {
if (pmeGrid != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 invBoxSize = cl.getInvPeriodicBoxSize();
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, boxSize);
......
......@@ -126,7 +126,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create the list of tiles.
int numAtomBlocks = context.getNumAtomBlocks();
int numTiles = numAtomBlocks*(numAtomBlocks+1)/2;
int totalTiles = numAtomBlocks*(numAtomBlocks+1)/2;
int numContexts = context.getPlatformData().contexts.size();
startTileIndex = context.getContextIndex()*totalTiles/numContexts;
int endTileIndex = (context.getContextIndex()+1)*totalTiles/numContexts;
numTiles = endTileIndex-startTileIndex;
// Build a list of indices for the tiles with exclusions.
......@@ -210,10 +214,10 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
mm_float4 boxSize = context.getPeriodicBoxSize();
int maxInteractingTiles = (int) (numTiles*(cutoff/boxSize.x+cutoff/boxSize.y+cutoff/boxSize.z));
if (maxInteractingTiles < 1)
maxInteractingTiles = 1;
if (maxInteractingTiles > numTiles)
maxInteractingTiles = numTiles;
if (maxInteractingTiles < 1)
maxInteractingTiles = 1;
interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true);
......@@ -229,6 +233,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (useCutoff) {
map<string, string> defines;
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["START_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex);
defines["END_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex+numTiles);
if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (usePeriodic)
......@@ -441,6 +447,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["START_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex);
defines["END_TILE_INDEX"] = OpenCLExpressionUtilities::intToString(startTileIndex+numTiles);
string file;
if (deviceIsCpu)
file = OpenCLKernelSources::nonbonded_cpu;
......@@ -470,7 +478,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer());
}
else {
kernel.setArg<cl_uint>(index++, context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2);
kernel.setArg<cl_uint>(index++, numTiles);
}
for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Memory>(index++, params[i].getMemory());
......
......@@ -178,6 +178,18 @@ public:
OpenCLArray<cl_uint>& getExclusionRowIndices() {
return *exclusionRowIndices;
}
/**
* Get the index of the first tile this context is responsible for processing.
*/
int getStartTileIndex() const {
return startTileIndex;
}
/**
* Get the total number of tiles this context is responsible for processing.
*/
int getNumTiles() const {
return numTiles;
}
/**
* Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions
* are assumed to be the same as those for the default interaction Kernel, since this kernel will use
......@@ -212,7 +224,7 @@ private:
std::map<std::string, std::string> kernelDefines;
double cutoff;
bool useCutoff, usePeriodic, forceBufferPerAtomBlock, deviceIsCpu;
int numForceBuffers;
int numForceBuffers, startTileIndex, numTiles;
};
/**
......
......@@ -91,6 +91,152 @@ void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system,
}
double OpenCLParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomBondForceKernel::OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomBondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcHarmonicAngleForceKernel::OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcHarmonicAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcHarmonicAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomAngleForceKernel::OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcPeriodicTorsionForceKernel::OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcPeriodicTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcPeriodicTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).execute(context, includeForces, includeEnergy);
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcRBTorsionForceKernel::OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcRBTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcRBTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCMAPTorsionForceKernel::OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCMAPTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCMAPTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcCustomTorsionForceKernel::OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcCustomTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
OpenCLParallelCalcNonbondedForceKernel::OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) :
CalcNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
for (int i = 0; i < (int) kernels.size(); i++)
getKernel(i).initialize(system, force);
}
double OpenCLParallelCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double energy = 0.0;
for (int i = 0; i < (int) kernels.size(); i++)
energy += getKernel(i).execute(context, includeForces, includeEnergy);
return energy;
}
......@@ -111,10 +111,10 @@ private:
*/
class OpenCLParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public:
OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomBondForceKernel(name, platform),
data(data) {
OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcCustomBondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomBondForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcCustomBondForceKernel();
/**
* Initialize the kernel.
*
......@@ -133,6 +133,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -140,10 +141,10 @@ private:
*/
class OpenCLParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public:
OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcHarmonicAngleForceKernel(name, platform),
data(data) {
OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcHarmonicAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcHarmonicAngleForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcHarmonicAngleForceKernel();
/**
* Initialize the kernel.
*
......@@ -162,6 +163,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -169,10 +171,10 @@ private:
*/
class OpenCLParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public:
OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomAngleForceKernel(name, platform),
data(data) {
OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcCustomAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomAngleForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcCustomAngleForceKernel();
/**
* Initialize the kernel.
*
......@@ -191,6 +193,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -198,10 +201,10 @@ private:
*/
class OpenCLParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public:
OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcPeriodicTorsionForceKernel(name, platform),
data(data) {
OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcPeriodicTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcPeriodicTorsionForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcPeriodicTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -220,6 +223,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -227,10 +231,10 @@ private:
*/
class OpenCLParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public:
OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcRBTorsionForceKernel(name, platform),
data(data) {
OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcRBTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcRBTorsionForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcRBTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -249,6 +253,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -256,10 +261,10 @@ private:
*/
class OpenCLParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public:
OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCMAPTorsionForceKernel(name, platform),
data(data) {
OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcCMAPTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCMAPTorsionForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcCMAPTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -278,6 +283,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -285,10 +291,10 @@ private:
*/
class OpenCLParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public:
OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcCustomTorsionForceKernel(name, platform),
data(data) {
OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcCustomTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomTorsionForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcCustomTorsionForceKernel();
/**
* Initialize the kernel.
*
......@@ -307,6 +313,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......@@ -314,10 +321,10 @@ private:
*/
class OpenCLParallelCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system) : CalcNonbondedForceKernel(name, platform),
data(data) {
OpenCLParallelCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, System& system);
OpenCLCalcNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcNonbondedForceKernel&>(kernels[index].getImpl());
}
~OpenCLParallelCalcNonbondedForceKernel();
/**
* Initialize the kernel.
*
......@@ -336,6 +343,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
OpenCLPlatform::PlatformData& data;
std::vector<Kernel> kernels;
};
/**
......
......@@ -172,12 +172,11 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
for (int i = 0; i < BUFFER_GROUPS; ++i)
valid[i*GROUP_SIZE+get_local_id(0)] = false;
barrier(CLK_LOCAL_MEM_FENCE);
const int numTiles = (NUM_BLOCKS*(NUM_BLOCKS+1))/2;
for (int baseIndex = get_group_id(0)*get_local_size(0); baseIndex < numTiles; baseIndex += get_global_size(0)) {
for (int baseIndex = START_TILE_INDEX+get_group_id(0)*get_local_size(0); baseIndex < END_TILE_INDEX; baseIndex += get_global_size(0)) {
// Identify the pair of blocks to compare.
int index = baseIndex+get_local_id(0);
if (index < numTiles) {
if (index < END_TILE_INDEX) {
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*index));
unsigned int x = (index-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......
......@@ -126,9 +126,9 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles) {
ushort2 buffer[BUFFER_SIZE];
int valuesInBuffer = 0;
const int numTiles = (NUM_BLOCKS*(NUM_BLOCKS+1))/2;
unsigned int start = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
const int numTiles = END_TILE_INDEX-START_TILE_INDEX;
unsigned int start = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
for (int index = start; index < end; index++) {
// Identify the pair of blocks to compare.
......
......@@ -21,11 +21,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+get_group_id(0)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : get_group_id(0)*numTiles/get_num_groups(0));
unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(get_group_id(0)+1)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : (get_group_id(0)+1)*numTiles/get_num_groups(0));
#else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
unsigned int pos = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
#endif
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
......
......@@ -22,11 +22,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+get_group_id(0)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : get_group_id(0)*numTiles/get_num_groups(0));
unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(get_group_id(0)+1)*(END_TILE_INDEX-START_TILE_INDEX)/get_num_groups(0) : (get_group_id(0)+1)*numTiles/get_num_groups(0));
#else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
unsigned int pos = START_TILE_INDEX+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = START_TILE_INDEX+(get_group_id(0)+1)*numTiles/get_num_groups(0);
#endif
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
......
......@@ -24,11 +24,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int pos = (numTiles > maxTiles ? START_TILE_INDEX+warp*(END_TILE_INDEX-START_TILE_INDEX)/totalWarps : warp*numTiles/totalWarps);
unsigned int end = (numTiles > maxTiles ? START_TILE_INDEX+(warp+1)*(END_TILE_INDEX-START_TILE_INDEX)/totalWarps : (warp+1)*numTiles/totalWarps);
#else
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
unsigned int pos = START_TILE_INDEX+warp*numTiles/totalWarps;
unsigned int end = START_TILE_INDEX+(warp+1)*numTiles/totalWarps;
#endif
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment