Commit 41bc0b2d authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to CustomNonbondedForce

parent 76fd2d1f
......@@ -97,6 +97,8 @@ void SetCustomNonbondedGlobalParams(const vector<float>& paramValues)
RTERROR(status, "SetCustomNonbondedGlobalParams: cudaMemcpyToSymbol failed");
}
#define STACK(y) stack[(y)*blockDim.x+threadIdx.x]
template<int SIZE>
__device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float var0, float4 vars1, float4 vars2)
{
......@@ -106,131 +108,131 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
switch (expression->op[i])
{
case CONSTANT:
stack[++stackPointer] = expression->arg[i];
STACK(++stackPointer) = expression->arg[i];
break;
case VARIABLE0:
stack[++stackPointer] = var0;
STACK(++stackPointer) = var0;
break;
case VARIABLE1:
stack[++stackPointer] = vars1.x;
STACK(++stackPointer) = vars1.x;
break;
case VARIABLE2:
stack[++stackPointer] = vars1.y;
STACK(++stackPointer) = vars1.y;
break;
case VARIABLE3:
stack[++stackPointer] = vars1.z;
STACK(++stackPointer) = vars1.z;
break;
case VARIABLE4:
stack[++stackPointer] = vars1.w;
STACK(++stackPointer) = vars1.w;
break;
case VARIABLE5:
stack[++stackPointer] = vars2.x;
STACK(++stackPointer) = vars2.x;
break;
case VARIABLE6:
stack[++stackPointer] = vars2.y;
STACK(++stackPointer) = vars2.y;
break;
case VARIABLE7:
stack[++stackPointer] = vars2.z;
STACK(++stackPointer) = vars2.z;
break;
case VARIABLE8:
stack[++stackPointer] = vars2.w;
STACK(++stackPointer) = vars2.w;
break;
case GLOBAL:
stack[++stackPointer] = globalParams[(int) expression->arg[i]];
STACK(++stackPointer) = globalParams[(int) expression->arg[i]];
break;
case ADD:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp+stack[--stackPointer];
float temp = STACK(stackPointer);
STACK(--stackPointer) += temp;
break;
}
case SUBTRACT:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp-stack[--stackPointer];
float temp = STACK(stackPointer);
STACK(stackPointer) = temp-STACK(--stackPointer);
break;
}
case MULTIPLY:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*stack[--stackPointer];
float temp = STACK(stackPointer);
STACK(--stackPointer) *= temp;
break;
}
case DIVIDE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp/stack[--stackPointer];
float temp = STACK(stackPointer);
STACK(stackPointer) = temp/STACK(--stackPointer);
break;
}
case POWER:
{
float temp = stack[stackPointer];
stack[stackPointer] = pow(temp, stack[--stackPointer]);
float temp = STACK(stackPointer);
STACK(stackPointer) = pow(temp, STACK(--stackPointer));
break;
}
case NEGATE:
stack[stackPointer] = -stack[stackPointer];
STACK(stackPointer) *= -1.0f;
break;
case SQRT:
stack[stackPointer] = sqrt(stack[stackPointer]);
STACK(stackPointer) = sqrt(STACK(stackPointer));
break;
case EXP:
stack[stackPointer] = exp(stack[stackPointer]);
STACK(stackPointer) = exp(STACK(stackPointer));
break;
case LOG:
stack[stackPointer] = log(stack[stackPointer]);
STACK(stackPointer) = log(STACK(stackPointer));
break;
case SIN:
stack[stackPointer] = sin(stack[stackPointer]);
STACK(stackPointer) = sin(STACK(stackPointer));
break;
case COS:
stack[stackPointer] = cos(stack[stackPointer]);
STACK(stackPointer) = cos(STACK(stackPointer));
break;
case SEC:
stack[stackPointer] = 1.0f/cos(stack[stackPointer]);
STACK(stackPointer) = 1.0f/cos(STACK(stackPointer));
break;
case CSC:
stack[stackPointer] = 1.0f/sin(stack[stackPointer]);
STACK(stackPointer) = 1.0f/sin(STACK(stackPointer));
break;
case TAN:
stack[stackPointer] = tan(stack[stackPointer]);
STACK(stackPointer) = tan(STACK(stackPointer));
break;
case COT:
stack[stackPointer] = 1.0f/tan(stack[stackPointer]);
STACK(stackPointer) = 1.0f/tan(STACK(stackPointer));
break;
case ASIN:
stack[stackPointer] = asin(stack[stackPointer]);
STACK(stackPointer) = asin(STACK(stackPointer));
break;
case ACOS:
stack[stackPointer] = acos(stack[stackPointer]);
STACK(stackPointer) = acos(STACK(stackPointer));
break;
case ATAN:
stack[stackPointer] = atan(stack[stackPointer]);
STACK(stackPointer) = atan(STACK(stackPointer));
break;
case SQUARE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp;
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp;
break;
}
case CUBE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp*temp;
float temp = STACK(stackPointer);
STACK(stackPointer) *= temp*temp;
break;
}
case RECIPROCAL:
stack[stackPointer] = 1.0f/stack[stackPointer];
STACK(stackPointer) = 1.0f/STACK(stackPointer);
break;
case INCREMENT:
stack[stackPointer] = stack[stackPointer]+1.0f;
STACK(stackPointer) += 1.0f;
break;
case DECREMENT:
stack[stackPointer] = stack[stackPointer]-1.0f;
STACK(stackPointer) -= 1.0f;
break;
}
}
return stack[stackPointer];
return STACK(stackPointer);
}
// Include versions of the kernels for N^2 calculations.
......
......@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
switch (k)
{
case 0:
......@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
switch (k)
{
case 0:
......@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params);
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
switch (k)
{
case 0:
......@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++)
{
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params);
float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
switch (k)
{
case 0:
......@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams);
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else
......@@ -469,8 +469,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
#endif
float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params);
float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, stack, r, params, params);
#ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment