Commit 78c26f71 authored by Peter Eastman's avatar Peter Eastman
Browse files

Completed CUDA implementation of CustomNonbondedForce

parent 47d2fa76
......@@ -279,6 +279,8 @@ struct cudaGmxSimulation {
unsigned int localForces_threads_per_block; // Threads per block in local forces kernel calls
unsigned int random_threads_per_block; // Threads per block in RNG kernel calls
unsigned int interaction_threads_per_block; // Threads per block when identifying interacting tiles
unsigned int custom_exception_threads_per_block; // Threads per block in custom nonbonded exception kernel calls
unsigned int customExpressionStackSize; // Stack size for evaluating custom nonbonded forces
unsigned int workUnits; // Number of work units
unsigned int* pWorkUnit; // Pointer to work units
unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions
......
......@@ -132,12 +132,15 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DUMP_PARAMETERS 0
template <int SIZE>
static Expression<SIZE> createExpression(const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables, const vector<string>& globalParamNames) {
static Expression<SIZE> createExpression(const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables,
const vector<string>& globalParamNames, unsigned int& maxStackSize) {
Expression<SIZE> exp;
if (program.getNumOperations() > SIZE)
throw OpenMMException("Expression contains too many operations: "+expression);
exp.length = program.getNumOperations();
exp.stackSize = program.getStackSize();
if (exp.stackSize > maxStackSize)
maxStackSize = exp.stackSize;
for (int i = 0; i < program.getNumOperations(); i++) {
const Operation& op = program.getOperation(i);
switch (op.getId()) {
......@@ -580,6 +583,11 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
gpu->sim.customNonbondedMethod = method;
gpu->sim.customExceptions = exceptionAtom1.size();
gpu->sim.customParameters = paramNames.size();
gpu->sim.custom_exception_threads_per_block = (gpu->sim.customExceptions+gpu->sim.blocks-1)/gpu->sim.blocks;
if (gpu->sim.custom_exception_threads_per_block < 1)
gpu->sim.custom_exception_threads_per_block = 1;
if (gpu->sim.custom_exception_threads_per_block > gpu->sim.max_localForces_threads_per_block)
gpu->sim.custom_exception_threads_per_block = gpu->sim.max_localForces_threads_per_block;
setExclusions(gpu, exclusions);
gpu->psCustomParams = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "CustomParams");
gpu->sim.pCustomParams = gpu->psCustomParams->_pDevData;
......@@ -621,8 +629,9 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
variables.push_back("r");
for (int i = 0; i < paramNames.size(); i++)
variables.push_back(paramNames[i]);
SetCustomNonbondedEnergyExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames));
SetCustomNonbondedForceExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames));
gpu->sim.customExpressionStackSize = 0;
SetCustomNonbondedEnergyExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
SetCustomNonbondedForceExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
Expression<64> paramExpressions[4];
vector<string> combiningRuleParams;
combiningRuleParams.push_back("");
......@@ -636,7 +645,7 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
combiningRuleParams.push_back("");
}
for (int i = 0; i < paramNames.size(); i++)
paramExpressions[i] = createExpression<64>(combiningRules[i], Lepton::Parser::parse(combiningRules[i]).optimize().createProgram(), combiningRuleParams, globalParamNames);
paramExpressions[i] = createExpression<64>(combiningRules[i], Lepton::Parser::parse(combiningRules[i]).optimize().createProgram(), combiningRuleParams, globalParamNames, gpu->sim.customExpressionStackSize);
SetCustomNonbondedCombiningRules(paramExpressions);
}
......
......@@ -277,16 +277,24 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
{
// printf("kCalculateCustomNonbondedCutoffForces\n");
CUDPPResult result;
int sharedPerThread = sizeof(Atom)+gpu->sim.customExpressionStackSize*sizeof(float);
if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3);
int threads = gpu->sim.nonbond_threads_per_block;
int maxThreads = 16380/sharedPerThread;
if (threads > maxThreads)
threads = (maxThreads/32)*32;
switch (gpu->sim.customNonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
kCalculateCustomNonbondedN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pWorkUnit);
else
kCalculateCustomNonbondedN2Forces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
kCalculateCustomNonbondedN2Forces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedN2Forces");
kCalculateCustomNonbondedN2Exceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedN2Exceptions");
break;
case CUTOFF:
if (!neighborListValid)
......@@ -306,12 +314,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateCustomNonbondedCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedCutoffForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateCustomNonbondedCutoffForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedCutoffForces");
kCalculateCustomNonbondedCutoffExceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedCutoffExceptions");
break;
case PERIODIC:
if (!neighborListValid)
......@@ -331,12 +340,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces");
kCalculateCustomNonbondedPeriodicExceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedPeriodicExceptions");
break;
}
}
......@@ -25,7 +25,7 @@
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* This file contains the kernels for evalauating custom nonbonded forces. It is included
* several times in kCalculateCustomNonbondedForces.cu with different #defines to generate
* different versions of the kernels.
*/
......@@ -33,7 +33,7 @@
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned int* workUnit)
{
extern __shared__ float stack[];
Atom* sA = (Atom*) &stack[MAX_STACK_SIZE*blockDim.x];
Atom* sA = (Atom*) &stack[cSim.customExpressionStackSize*blockDim.x];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
......@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[j].params);
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
switch (k)
{
case 0:
......@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[tj].params);
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
switch (k)
{
case 0:
......@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[j].params);
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
switch (k)
{
case 0:
......@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[tj].params);
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
switch (k)
{
case 0:
......@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -445,3 +445,56 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
}
cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += totalEnergy;
}
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
{
extern __shared__ float stack[];
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
float totalEnergy = 0.0f;
while (pos < cSim.customExceptions)
{
int4 atom = cSim.pCustomExceptionID[pos];
float4 params = cSim.pCustomExceptionParams[pos];
float4 a1 = cSim.pPosq[atom.x];
float4 a2 = cSim.pPosq[atom.y];
float dx = a1.x - a2.x;
float dy = a1.y - a2.y;
float dz = a1.z - a2.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
dEdR = 0.0f;
energy = 0.0f;
}
#endif
totalEnergy += energy;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
unsigned int offsetA = atom.x + atom.z * cSim.stride;
unsigned int offsetB = atom.y + atom.w * cSim.stride;
float4 forceA = cSim.pForce4[offsetA];
float4 forceB = cSim.pForce4[offsetB];
forceA.x += dx;
forceA.y += dy;
forceA.z += dz;
forceB.x -= dx;
forceB.y -= dy;
forceB.z -= dz;
cSim.pForce4[offsetA] = forceA;
cSim.pForce4[offsetB] = forceB;
pos += blockDim.x * gridDim.x;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
}
......@@ -206,7 +206,7 @@ int main() {
try {
testSimpleExpression();
testParameters();
// testExceptions();
testExceptions();
testCutoff();
testPeriodic();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment