Commit 77f2d93b authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimized CUDA implementation of CustomNonbondedForce

parent 39ca36ee
......@@ -249,8 +249,8 @@ enum CudaNonbondedMethod
};
enum ExpressionOp {
CONSTANT = 0, VARIABLE0, VARIABLE1, VARIABLE2, VARIABLE3, VARIABLE4, VARIABLE5, VARIABLE6, VARIABLE7, VARIABLE8, GLOBAL, CUSTOM, CUSTOM_DERIV, ADD, SUBTRACT, MULTIPLY, DIVIDE,
POWER, NEGATE, SQRT, EXP, LOG, SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, SQUARE, CUBE, RECIPROCAL, ADD_CONSTANT, MULTIPLY_CONSTANT, POWER_CONSTANT
VARIABLE0 = 0, VARIABLE1, VARIABLE2, VARIABLE3, VARIABLE4, VARIABLE5, VARIABLE6, VARIABLE7, MULTIPLY, DIVIDE, ADD, SUBTRACT, POWER, MULTIPLY_CONSTANT, POWER_CONSTANT, ADD_CONSTANT,
GLOBAL, CONSTANT, CUSTOM, CUSTOM_DERIV, NEGATE, RECIPROCAL, SQRT, EXP, LOG, SQUARE, CUBE, SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN
};
template<int SIZE>
......
......@@ -168,8 +168,6 @@ static Expression<SIZE> createExpression(gpuContext gpu, const string& expressio
exp.op[i] = VARIABLE6;
else if (variables.size() > 7 && op.getName() == variables[7])
exp.op[i] = VARIABLE7;
else if (variables.size() > 8 && op.getName() == variables[8])
exp.op[i] = VARIABLE8;
else {
int j;
for (j = 0; j < globalParamNames.size() && op.getName() != globalParamNames[j]; j++);
......@@ -731,7 +729,6 @@ void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double>
SetCustomNonbondedForceExpression(createExpression<128>(gpu, energyExp, Lepton::Parser::parse(energyExp, functions).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
Expression<64> paramExpressions[4];
vector<string> combiningRuleParams;
combiningRuleParams.push_back("");
for (int j = 1; j < 3; j++) {
for (int i = 0; i < paramNames.size(); i++) {
stringstream name;
......
......@@ -103,56 +103,59 @@ void SetCustomNonbondedGlobalParams(float* paramValues)
}
#define STACK(y) stack[(y)*blockDim.x+threadIdx.x]
#define VARIABLE(y) variables[(y)*blockDim.x+threadIdx.x]
template<int SIZE>
__device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float var0, float4 vars1, float4 vars2)
__device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float* variables)
{
int stackPointer = -1;
for (int i = 0; i < expression->length; i++)
{
int op = expression->op[i];
if (op < SQRT) {
if (op < VARIABLE8) {
if (op < VARIABLE4) {
if (op == CONSTANT) {
STACK(++stackPointer) = expression->arg[i];
}
else if (op == VARIABLE0) {
STACK(++stackPointer) = var0;
if (op < MULTIPLY) {
STACK(++stackPointer) = VARIABLE(op-VARIABLE0);
}
else if (op == VARIABLE1) {
STACK(++stackPointer) = vars1.x;
else if (op < NEGATE) {
if (op < MULTIPLY_CONSTANT) {
if (op == MULTIPLY) {
float temp = STACK(stackPointer);
STACK(--stackPointer) *= temp;
}
else if (op == VARIABLE2) {
STACK(++stackPointer) = vars1.y;
else if (op == DIVIDE) {
float temp = STACK(stackPointer);
STACK(stackPointer) = temp/STACK(--stackPointer);
}
else if (op == VARIABLE3) {
STACK(++stackPointer) = vars1.z;
else if (op == ADD) {
float temp = STACK(stackPointer);
STACK(--stackPointer) += temp;
}
else if (op == SUBTRACT) {
float temp = STACK(stackPointer);
STACK(stackPointer) = temp-STACK(--stackPointer);
}
else {
if (op == VARIABLE4) {
STACK(++stackPointer) = vars1.w;
else if (op == POWER) {
float temp = STACK(stackPointer);
STACK(stackPointer) = pow(temp, STACK(--stackPointer));
}
else if (op == VARIABLE5) {
STACK(++stackPointer) = vars2.x;
}
else if (op == VARIABLE6) {
STACK(++stackPointer) = vars2.y;
else if (op < GLOBAL) {
if (op == MULTIPLY_CONSTANT) {
STACK(stackPointer) *= expression->arg[i];
}
else if (op == VARIABLE7) {
STACK(++stackPointer) = vars2.z;
else if (op == POWER_CONSTANT) {
STACK(stackPointer) = pow(STACK(stackPointer), expression->arg[i]);
}
else if (op == ADD_CONSTANT) {
STACK(stackPointer) += expression->arg[i];
}
}
else {
if (op < MULTIPLY) {
if (op == VARIABLE8) {
STACK(++stackPointer) = vars2.w;
}
else if (op == GLOBAL) {
if (op == GLOBAL) {
STACK(++stackPointer) = globalParams[(int) expression->arg[i]];
}
else if (op == CONSTANT) {
STACK(++stackPointer) = expression->arg[i];
}
else if (op == CUSTOM || op == CUSTOM_DERIV) {
int function = (int) expression->arg[i];
float x = STACK(stackPointer);
......@@ -178,38 +181,17 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
STACK(stackPointer) = (coeff.y+x*(2.0f*coeff.z+x*3.0f*coeff.w))*params.z;
}
}
else if (op == ADD) {
float temp = STACK(stackPointer);
STACK(--stackPointer) += temp;
}
else if (op == SUBTRACT) {
float temp = STACK(stackPointer);
STACK(stackPointer) = temp-STACK(--stackPointer);
}
}
else {
if (op == MULTIPLY) {
float temp = STACK(stackPointer);
STACK(--stackPointer) *= temp;
}
else if (op == DIVIDE) {
float temp = STACK(stackPointer);
STACK(stackPointer) = temp/STACK(--stackPointer);
}
else if (op == POWER) {
float temp = STACK(stackPointer);
STACK(stackPointer) = pow(temp, STACK(--stackPointer));
}
else if (op == NEGATE) {
if (op < SIN) {
if (op == NEGATE) {
STACK(stackPointer) *= -1.0f;
}
else if (op == RECIPROCAL) {
STACK(stackPointer) = 1.0f/STACK(stackPointer);
}
}
}
else {
if (op < ASIN) {
if (op < SEC) {
if (op == SQRT) {
else if (op == SQRT) {
STACK(stackPointer) = sqrt(STACK(stackPointer));
}
else if (op == EXP) {
......@@ -218,15 +200,23 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
else if (op == LOG) {
STACK(stackPointer) = log(STACK(stackPointer));
}
else if (op == SIN) {
else if (op == SQUARE) {
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp;
}
else if (op == CUBE) {
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp*temp;
}
}
else {
if (op == SIN) {
STACK(stackPointer) = sin(STACK(stackPointer));
}
else if (op == COS) {
STACK(stackPointer) = cos(STACK(stackPointer));
}
}
else {
if (op == SEC) {
else if (op == SEC) {
STACK(stackPointer) = 1.0f/cos(STACK(stackPointer));
}
else if (op == CSC) {
......@@ -238,11 +228,7 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
else if (op == COT) {
STACK(stackPointer) = 1.0f/tan(STACK(stackPointer));
}
}
}
else {
if (op < RECIPROCAL) {
if (op == ASIN) {
else if (op == ASIN) {
STACK(stackPointer) = asin(STACK(stackPointer));
}
else if (op == ACOS) {
......@@ -251,29 +237,6 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
else if (op == ATAN) {
STACK(stackPointer) = atan(STACK(stackPointer));
}
else if (op == SQUARE) {
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp;
}
else if (op == CUBE) {
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp*temp;
}
}
else {
if (op == RECIPROCAL) {
STACK(stackPointer) = 1.0f/STACK(stackPointer);
}
else if (op == ADD_CONSTANT) {
STACK(stackPointer) += expression->arg[i];
}
else if (op == MULTIPLY_CONSTANT) {
STACK(stackPointer) *= expression->arg[i];
}
else if (op == POWER_CONSTANT) {
STACK(stackPointer) = pow(STACK(stackPointer), expression->arg[i]);
}
}
}
}
}
......@@ -336,7 +299,7 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
cudaBindTexture(NULL, &texRef3, gpu->tabulatedFunctions[3].coefficients->_pDevData, &channelDesc, gpu->tabulatedFunctions[3].coefficients->_length*sizeof(float4));
gpu->tabulatedFunctionsChanged = false;
}
int sharedPerThread = sizeof(Atom)+gpu->sim.customExpressionStackSize*sizeof(float);
int sharedPerThread = sizeof(Atom)+gpu->sim.customExpressionStackSize*sizeof(float)+8*sizeof(float);
if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3);
int threads = gpu->sim.nonbond_threads_per_block;
......@@ -392,7 +355,7 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
kCalculateCustomNonbondedPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, threads, sharedPerThread*threads>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces");
kCalculateCustomNonbondedPeriodicExceptions_kernel<<<gpu->sim.blocks, gpu->sim.custom_exception_threads_per_block,
gpu->sim.customExpressionStackSize*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
(gpu->sim.customExpressionStackSize+8)*sizeof(float)*gpu->sim.custom_exception_threads_per_block>>>();
LAUNCHERROR("kCalculateCustomNonbondedPeriodicExceptions");
break;
}
......
......@@ -34,6 +34,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
{
extern __shared__ float stack[];
Atom* sA = (Atom*) &stack[cSim.customExpressionStackSize*blockDim.x];
float* variables = (float*) &sA[blockDim.x];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
......@@ -79,26 +80,38 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
{
// Apply the combining rules to the parameters.
float4 combinedParams = make_float4(0, 0, 0, 0);
float combinedParams[4];
VARIABLE(0) = params.x;
VARIABLE(1) = params.y;
VARIABLE(2) = params.z;
VARIABLE(3) = params.w;
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
VARIABLE(4) = psA[j].params.x;
VARIABLE(5) = psA[j].params.y;
VARIABLE(6) = psA[j].params.z;
VARIABLE(7) = psA[j].params.w;
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, variables);
switch (k)
{
case 0:
combinedParams.x = value;
combinedParams[0] = value;
break;
case 1:
combinedParams.y = value;
combinedParams[1] = value;
break;
case 2:
combinedParams.z = value;
combinedParams[2] = value;
break;
case 3:
combinedParams.w = value;
combinedParams[3] = value;
break;
}
}
VARIABLE(1) = combinedParams[0];
VARIABLE(2) = combinedParams[1];
VARIABLE(3) = combinedParams[2];
VARIABLE(4) = combinedParams[3];
// Compute the force.
......@@ -112,8 +125,9 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
VARIABLE(0) = r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, variables)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, variables);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -183,26 +197,38 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
{
// Apply the combining rules to the parameters.
float4 combinedParams = make_float4(0, 0, 0, 0);
float combinedParams[4];
VARIABLE(0) = params.x;
VARIABLE(1) = params.y;
VARIABLE(2) = params.z;
VARIABLE(3) = params.w;
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
VARIABLE(4) = psA[j].params.x;
VARIABLE(5) = psA[j].params.y;
VARIABLE(6) = psA[j].params.z;
VARIABLE(7) = psA[j].params.w;
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, variables);
switch (k)
{
case 0:
combinedParams.x = value;
combinedParams[0] = value;
break;
case 1:
combinedParams.y = value;
combinedParams[1] = value;
break;
case 2:
combinedParams.z = value;
combinedParams[2] = value;
break;
case 3:
combinedParams.w = value;
combinedParams[3] = value;
break;
}
}
VARIABLE(1) = combinedParams[0];
VARIABLE(2) = combinedParams[1];
VARIABLE(3) = combinedParams[2];
VARIABLE(4) = combinedParams[3];
// Compute the force.
......@@ -216,8 +242,9 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
VARIABLE(0) = r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, variables)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, variables);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -249,26 +276,38 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
{
// Apply the combining rules to the parameters.
float4 combinedParams = make_float4(0, 0, 0, 0);
float combinedParams[4];
VARIABLE(0) = params.x;
VARIABLE(1) = params.y;
VARIABLE(2) = params.z;
VARIABLE(3) = params.w;
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
VARIABLE(4) = psA[j].params.x;
VARIABLE(5) = psA[j].params.y;
VARIABLE(6) = psA[j].params.z;
VARIABLE(7) = psA[j].params.w;
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, variables);
switch (k)
{
case 0:
combinedParams.x = value;
combinedParams[0] = value;
break;
case 1:
combinedParams.y = value;
combinedParams[1] = value;
break;
case 2:
combinedParams.z = value;
combinedParams[2] = value;
break;
case 3:
combinedParams.w = value;
combinedParams[3] = value;
break;
}
}
VARIABLE(1) = combinedParams[0];
VARIABLE(2) = combinedParams[1];
VARIABLE(3) = combinedParams[2];
VARIABLE(4) = combinedParams[3];
// Compute the force.
......@@ -282,8 +321,9 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
VARIABLE(0) = r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, variables)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, variables);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -351,26 +391,38 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
{
// Apply the combining rules to the parameters.
float4 combinedParams = make_float4(0, 0, 0, 0);
float combinedParams[4];
VARIABLE(0) = params.x;
VARIABLE(1) = params.y;
VARIABLE(2) = params.z;
VARIABLE(3) = params.w;
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
VARIABLE(4) = psA[j].params.x;
VARIABLE(5) = psA[j].params.y;
VARIABLE(6) = psA[j].params.z;
VARIABLE(7) = psA[j].params.w;
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, variables);
switch (k)
{
case 0:
combinedParams.x = value;
combinedParams[0] = value;
break;
case 1:
combinedParams.y = value;
combinedParams[1] = value;
break;
case 2:
combinedParams.z = value;
combinedParams[2] = value;
break;
case 3:
combinedParams.w = value;
combinedParams[3] = value;
break;
}
}
VARIABLE(1) = combinedParams[0];
VARIABLE(2) = combinedParams[1];
VARIABLE(3) = combinedParams[2];
VARIABLE(4) = combinedParams[3];
// Compute the force.
......@@ -384,8 +436,9 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
VARIABLE(0) = r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, variables)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, variables);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -450,6 +503,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
__global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
{
extern __shared__ float stack[];
float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x];
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
float totalEnergy = 0.0f;
......@@ -469,8 +523,13 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, params, params);
VARIABLE(0) = r;
VARIABLE(1) = params.x;
VARIABLE(2) = params.y;
VARIABLE(3) = params.z;
VARIABLE(4) = params.w;
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, variables)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, variables);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment