Commit 78c26f71 authored by Peter Eastman's avatar Peter Eastman
Browse files

Completed CUDA implementation of CustomNonbondedForce

parent 47d2fa76
...@@ -279,6 +279,8 @@ struct cudaGmxSimulation { ...@@ -279,6 +279,8 @@ struct cudaGmxSimulation {
unsigned int localForces_threads_per_block; // Threads per block in local forces kernel calls unsigned int localForces_threads_per_block; // Threads per block in local forces kernel calls
unsigned int random_threads_per_block; // Threads per block in RNG kernel calls unsigned int random_threads_per_block; // Threads per block in RNG kernel calls
unsigned int interaction_threads_per_block; // Threads per block when identifying interacting tiles unsigned int interaction_threads_per_block; // Threads per block when identifying interacting tiles
unsigned int custom_exception_threads_per_block; // Threads per block in custom nonbonded exception kernel calls
unsigned int customExpressionStackSize; // Stack size for evaluating custom nonbonded forces
unsigned int workUnits; // Number of work units unsigned int workUnits; // Number of work units
unsigned int* pWorkUnit; // Pointer to work units unsigned int* pWorkUnit; // Pointer to work units
unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions unsigned int* pInteractingWorkUnit; // Pointer to work units that have interactions
......
...@@ -132,12 +132,15 @@ static const float BOLTZ = (RGAS / KILO); // (k ...@@ -132,12 +132,15 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DUMP_PARAMETERS 0 #define DUMP_PARAMETERS 0
template <int SIZE> template <int SIZE>
static Expression<SIZE> createExpression(const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables, const vector<string>& globalParamNames) { static Expression<SIZE> createExpression(const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables,
const vector<string>& globalParamNames, unsigned int& maxStackSize) {
Expression<SIZE> exp; Expression<SIZE> exp;
if (program.getNumOperations() > SIZE) if (program.getNumOperations() > SIZE)
throw OpenMMException("Expression contains too many operations: "+expression); throw OpenMMException("Expression contains too many operations: "+expression);
exp.length = program.getNumOperations(); exp.length = program.getNumOperations();
exp.stackSize = program.getStackSize(); exp.stackSize = program.getStackSize();
if (exp.stackSize > maxStackSize)
maxStackSize = exp.stackSize;
for (int i = 0; i < program.getNumOperations(); i++) { for (int i = 0; i < program.getNumOperations(); i++) {
const Operation& op = program.getOperation(i); const Operation& op = program.getOperation(i);
switch (op.getId()) { switch (op.getId()) {
...@@ -580,6 +583,11 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> ...@@ -580,6 +583,11 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
gpu->sim.customNonbondedMethod = method; gpu->sim.customNonbondedMethod = method;
gpu->sim.customExceptions = exceptionAtom1.size(); gpu->sim.customExceptions = exceptionAtom1.size();
gpu->sim.customParameters = paramNames.size(); gpu->sim.customParameters = paramNames.size();
gpu->sim.custom_exception_threads_per_block = (gpu->sim.customExceptions+gpu->sim.blocks-1)/gpu->sim.blocks;
if (gpu->sim.custom_exception_threads_per_block < 1)
gpu->sim.custom_exception_threads_per_block = 1;
if (gpu->sim.custom_exception_threads_per_block > gpu->sim.max_localForces_threads_per_block)
gpu->sim.custom_exception_threads_per_block = gpu->sim.max_localForces_threads_per_block;
setExclusions(gpu, exclusions); setExclusions(gpu, exclusions);
gpu->psCustomParams = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "CustomParams"); gpu->psCustomParams = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "CustomParams");
gpu->sim.pCustomParams = gpu->psCustomParams->_pDevData; gpu->sim.pCustomParams = gpu->psCustomParams->_pDevData;
...@@ -621,8 +629,9 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> ...@@ -621,8 +629,9 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
variables.push_back("r"); variables.push_back("r");
for (int i = 0; i < paramNames.size(); i++) for (int i = 0; i < paramNames.size(); i++)
variables.push_back(paramNames[i]); variables.push_back(paramNames[i]);
SetCustomNonbondedEnergyExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames)); gpu->sim.customExpressionStackSize = 0;
SetCustomNonbondedForceExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames)); SetCustomNonbondedEnergyExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
SetCustomNonbondedForceExpression(createExpression<128>(energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
Expression<64> paramExpressions[4]; Expression<64> paramExpressions[4];
vector<string> combiningRuleParams; vector<string> combiningRuleParams;
combiningRuleParams.push_back(""); combiningRuleParams.push_back("");
...@@ -636,7 +645,7 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> ...@@ -636,7 +645,7 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
combiningRuleParams.push_back(""); combiningRuleParams.push_back("");
} }
for (int i = 0; i < paramNames.size(); i++) for (int i = 0; i < paramNames.size(); i++)
paramExpressions[i] = createExpression<64>(combiningRules[i], Lepton::Parser::parse(combiningRules[i]).optimize().createProgram(), combiningRuleParams, globalParamNames); paramExpressions[i] = createExpression<64>(combiningRules[i], Lepton::Parser::parse(combiningRules[i]).optimize().createProgram(), combiningRuleParams, globalParamNames, gpu->sim.customExpressionStackSize);
SetCustomNonbondedCombiningRules(paramExpressions); SetCustomNonbondedCombiningRules(paramExpressions);
} }
......
...@@ -277,16 +277,24 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid) ...@@ -277,16 +277,24 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
{ {
// printf("kCalculateCustomNonbondedCutoffForces\n"); // printf("kCalculateCustomNonbondedCutoffForces\n");
CUDPPResult result; CUDPPResult result;
int sharedPerThread = sizeof(Atom)+gpu->sim.customExpressionStackSize*sizeof(float);
if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3);
int threads = gpu->sim.nonbond_threads_per_block;
int maxThreads = 16380/sharedPerThread;
if (threads > maxThreads)
threads = (maxThreads/32)*32;
switch (gpu->sim.customNonbondedMethod) switch (gpu->sim.customNonbondedMethod)
{ {
case NO_CUTOFF: case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else else
kCalculateCustomNonbondedN2Forces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedN2Forces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedN2Forces"); LAUNCHERROR("kCalculateCustomNonbondedN2Forces");
kCalculateCustomNonbondedN2Exceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedN2Exceptions");
break; break;
case CUTOFF: case CUTOFF:
if (!neighborListValid) if (!neighborListValid)
...@@ -306,12 +314,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid) ...@@ -306,12 +314,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
} }
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCustomNonbondedCutoffForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedCutoffForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedCutoffForces"); LAUNCHERROR("kCalculateCustomNonbondedCutoffForces");
kCalculateCustomNonbondedCutoffExceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedCutoffExceptions");
break; break;
case PERIODIC: case PERIODIC:
if (!neighborListValid) if (!neighborListValid)
...@@ -331,12 +340,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid) ...@@ -331,12 +340,13 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
} }
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces"); LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces");
kCalculateCustomNonbondedPeriodicExceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedPeriodicExceptions");
break; break;
} }
} }
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
/** /**
* This file contains the kernels for evalauating nonbonded forces. It is included * This file contains the kernels for evalauating custom nonbonded forces. It is included
* several times in kCalculateCustomNonbondedForces.cu with different #defines to generate * several times in kCalculateCustomNonbondedForces.cu with different #defines to generate
* different versions of the kernels. * different versions of the kernels.
*/ */
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned int* workUnit) __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned int* workUnit)
{ {
extern __shared__ float stack[]; extern __shared__ float stack[];
Atom* sA = (Atom*) &stack[MAX_STACK_SIZE*blockDim.x]; Atom* sA = (Atom*) &stack[cSim.customExpressionStackSize*blockDim.x];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID; unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
...@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[j].params); float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff) if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else #else
...@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[tj].params); float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff) if (r > cSim.nonbondedCutoff)
{ {
...@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[j].params); float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff) if (r > cSim.nonbondedCutoff)
{ {
...@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[MAX_STACK_SIZE*threadIdx.x], 0.0f, params, psA[tj].params); float value = kEvaluateExpression_kernel(&combiningRules[0], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[MAX_STACK_SIZE*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff) if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else #else
...@@ -445,3 +445,56 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -445,3 +445,56 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
} }
cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += totalEnergy; cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += totalEnergy;
} }
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
{
extern __shared__ float stack[];
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
float totalEnergy = 0.0f;
while (pos < cSim.customExceptions)
{
int4 atom = cSim.pCustomExceptionID[pos];
float4 params = cSim.pCustomExceptionParams[pos];
float4 a1 = cSim.pPosq[atom.x];
float4 a2 = cSim.pPosq[atom.y];
float dx = a1.x - a2.x;
float dy = a1.y - a2.y;
float dz = a1.z - a2.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
dEdR = 0.0f;
energy = 0.0f;
}
#endif
totalEnergy += energy;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
unsigned int offsetA = atom.x + atom.z * cSim.stride;
unsigned int offsetB = atom.y + atom.w * cSim.stride;
float4 forceA = cSim.pForce4[offsetA];
float4 forceB = cSim.pForce4[offsetB];
forceA.x += dx;
forceA.y += dy;
forceA.z += dz;
forceB.x -= dx;
forceB.y -= dy;
forceB.z -= dz;
cSim.pForce4[offsetA] = forceA;
cSim.pForce4[offsetB] = forceB;
pos += blockDim.x * gridDim.x;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
}
...@@ -206,7 +206,7 @@ int main() { ...@@ -206,7 +206,7 @@ int main() {
try { try {
testSimpleExpression(); testSimpleExpression();
testParameters(); testParameters();
// testExceptions(); testExceptions();
testCutoff(); testCutoff();
testPeriodic(); testPeriodic();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment