Commit 41bc0b2d authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to CustomNonbondedForce

parent 76fd2d1f
...@@ -97,6 +97,8 @@ void SetCustomNonbondedGlobalParams(const vector<float>& paramValues) ...@@ -97,6 +97,8 @@ void SetCustomNonbondedGlobalParams(const vector<float>& paramValues)
RTERROR(status, "SetCustomNonbondedGlobalParams: cudaMemcpyToSymbol failed"); RTERROR(status, "SetCustomNonbondedGlobalParams: cudaMemcpyToSymbol failed");
} }
#define STACK(y) stack[(y)*blockDim.x+threadIdx.x]
template<int SIZE> template<int SIZE>
__device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float var0, float4 vars1, float4 vars2) __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* stack, float var0, float4 vars1, float4 vars2)
{ {
...@@ -106,131 +108,131 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float* ...@@ -106,131 +108,131 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
switch (expression->op[i]) switch (expression->op[i])
{ {
case CONSTANT: case CONSTANT:
stack[++stackPointer] = expression->arg[i]; STACK(++stackPointer) = expression->arg[i];
break; break;
case VARIABLE0: case VARIABLE0:
stack[++stackPointer] = var0; STACK(++stackPointer) = var0;
break; break;
case VARIABLE1: case VARIABLE1:
stack[++stackPointer] = vars1.x; STACK(++stackPointer) = vars1.x;
break; break;
case VARIABLE2: case VARIABLE2:
stack[++stackPointer] = vars1.y; STACK(++stackPointer) = vars1.y;
break; break;
case VARIABLE3: case VARIABLE3:
stack[++stackPointer] = vars1.z; STACK(++stackPointer) = vars1.z;
break; break;
case VARIABLE4: case VARIABLE4:
stack[++stackPointer] = vars1.w; STACK(++stackPointer) = vars1.w;
break; break;
case VARIABLE5: case VARIABLE5:
stack[++stackPointer] = vars2.x; STACK(++stackPointer) = vars2.x;
break; break;
case VARIABLE6: case VARIABLE6:
stack[++stackPointer] = vars2.y; STACK(++stackPointer) = vars2.y;
break; break;
case VARIABLE7: case VARIABLE7:
stack[++stackPointer] = vars2.z; STACK(++stackPointer) = vars2.z;
break; break;
case VARIABLE8: case VARIABLE8:
stack[++stackPointer] = vars2.w; STACK(++stackPointer) = vars2.w;
break; break;
case GLOBAL: case GLOBAL:
stack[++stackPointer] = globalParams[(int) expression->arg[i]]; STACK(++stackPointer) = globalParams[(int) expression->arg[i]];
break; break;
case ADD: case ADD:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp+stack[--stackPointer]; STACK(--stackPointer) += temp;
break; break;
} }
case SUBTRACT: case SUBTRACT:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp-stack[--stackPointer]; STACK(stackPointer) = temp-STACK(--stackPointer);
break; break;
} }
case MULTIPLY: case MULTIPLY:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp*stack[--stackPointer]; STACK(--stackPointer) *= temp;
break; break;
} }
case DIVIDE: case DIVIDE:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp/stack[--stackPointer]; STACK(stackPointer) = temp/STACK(--stackPointer);
break; break;
} }
case POWER: case POWER:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = pow(temp, stack[--stackPointer]); STACK(stackPointer) = pow(temp, STACK(--stackPointer));
break; break;
} }
case NEGATE: case NEGATE:
stack[stackPointer] = -stack[stackPointer]; STACK(stackPointer) *= -1.0f;
break; break;
case SQRT: case SQRT:
stack[stackPointer] = sqrt(stack[stackPointer]); STACK(stackPointer) = sqrt(STACK(stackPointer));
break; break;
case EXP: case EXP:
stack[stackPointer] = exp(stack[stackPointer]); STACK(stackPointer) = exp(STACK(stackPointer));
break; break;
case LOG: case LOG:
stack[stackPointer] = log(stack[stackPointer]); STACK(stackPointer) = log(STACK(stackPointer));
break; break;
case SIN: case SIN:
stack[stackPointer] = sin(stack[stackPointer]); STACK(stackPointer) = sin(STACK(stackPointer));
break; break;
case COS: case COS:
stack[stackPointer] = cos(stack[stackPointer]); STACK(stackPointer) = cos(STACK(stackPointer));
break; break;
case SEC: case SEC:
stack[stackPointer] = 1.0f/cos(stack[stackPointer]); STACK(stackPointer) = 1.0f/cos(STACK(stackPointer));
break; break;
case CSC: case CSC:
stack[stackPointer] = 1.0f/sin(stack[stackPointer]); STACK(stackPointer) = 1.0f/sin(STACK(stackPointer));
break; break;
case TAN: case TAN:
stack[stackPointer] = tan(stack[stackPointer]); STACK(stackPointer) = tan(STACK(stackPointer));
break; break;
case COT: case COT:
stack[stackPointer] = 1.0f/tan(stack[stackPointer]); STACK(stackPointer) = 1.0f/tan(STACK(stackPointer));
break; break;
case ASIN: case ASIN:
stack[stackPointer] = asin(stack[stackPointer]); STACK(stackPointer) = asin(STACK(stackPointer));
break; break;
case ACOS: case ACOS:
stack[stackPointer] = acos(stack[stackPointer]); STACK(stackPointer) = acos(STACK(stackPointer));
break; break;
case ATAN: case ATAN:
stack[stackPointer] = atan(stack[stackPointer]); STACK(stackPointer) = atan(STACK(stackPointer));
break; break;
case SQUARE: case SQUARE:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp*temp; STACK(stackPointer) *= temp;
break; break;
} }
case CUBE: case CUBE:
{ {
float temp = stack[stackPointer]; float temp = STACK(stackPointer);
stack[stackPointer] = temp*temp*temp; STACK(stackPointer) *= temp*temp;
break; break;
} }
case RECIPROCAL: case RECIPROCAL:
stack[stackPointer] = 1.0f/stack[stackPointer]; STACK(stackPointer) = 1.0f/STACK(stackPointer);
break; break;
case INCREMENT: case INCREMENT:
stack[stackPointer] = stack[stackPointer]+1.0f; STACK(stackPointer) += 1.0f;
break; break;
case DECREMENT: case DECREMENT:
stack[stackPointer] = stack[stackPointer]-1.0f; STACK(stackPointer) -= 1.0f;
break; break;
} }
} }
return stack[stackPointer]; return STACK(stackPointer);
} }
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
......
...@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -82,7 +82,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params); float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -112,8 +112,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff) if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else #else
...@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -186,7 +186,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params); float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -216,8 +216,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff) if (r > cSim.nonbondedCutoff)
{ {
...@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -252,7 +252,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[j].params); float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[j].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -282,8 +282,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff) if (r > cSim.nonbondedCutoff)
{ {
...@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -354,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
float4 combinedParams = make_float4(0, 0, 0, 0); float4 combinedParams = make_float4(0, 0, 0, 0);
for (int k = 0; k < cSim.customParameters; k++) for (int k = 0; k < cSim.customParameters; k++)
{ {
float value = kEvaluateExpression_kernel(&combiningRules[k], &stack[cSim.customExpressionStackSize*threadIdx.x], 0.0f, params, psA[tj].params); float value = kEvaluateExpression_kernel(&combiningRules[k], stack, 0.0f, params, psA[tj].params);
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i ...@@ -384,8 +384,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Forces_kernel)(unsigned i
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, combinedParams, combinedParams)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, combinedParams, combinedParams); float energy = kEvaluateExpression_kernel(&energyExp, stack, r, combinedParams, combinedParams);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (!(excl & 0x1) || r > cSim.nonbondedCutoff) if (!(excl & 0x1) || r > cSim.nonbondedCutoff)
#else #else
...@@ -469,8 +469,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)() ...@@ -469,8 +469,8 @@ __global__ void METHOD_NAME(kCalculateCustomNonbonded, Exceptions_kernel)()
#endif #endif
float r = sqrt(dx*dx + dy*dy + dz*dz); float r = sqrt(dx*dx + dy*dy + dz*dz);
float invR = 1.0f/r; float invR = 1.0f/r;
float dEdR = -kEvaluateExpression_kernel(&forceExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params)*invR; float dEdR = -kEvaluateExpression_kernel(&forceExp, stack, r, params, params)*invR;
float energy = kEvaluateExpression_kernel(&energyExp, &stack[cSim.customExpressionStackSize*threadIdx.x], r, params, params); float energy = kEvaluateExpression_kernel(&energyExp, stack, r, params, params);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (r > cSim.nonbondedCutoff) if (r > cSim.nonbondedCutoff)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment