"platforms/reference/vscode:/vscode.git/clone" did not exist on "c4d89122d17f1a0c6f6f246a9b86f6aa55cab6b2"
Commit e009228a authored by Peter Eastman's avatar Peter Eastman
Browse files

Use texture cache for tabulated functions

parent 8689a536
......@@ -232,7 +232,7 @@ static const int GT2XX_RANDOM_THREADS_PER_BLOCK = 384;
static const int G8X_NONBOND_WORKUNITS_PER_SM = 220;
static const int GT2XX_NONBOND_WORKUNITS_PER_SM = 256;
static const unsigned int MAX_STACK_SIZE = 8;
static const unsigned int MAX_TABULATED_FUNCTIONS = 8;
static const unsigned int MAX_TABULATED_FUNCTIONS = 4;
static const float PI = 3.14159265358979323846f;
......
......@@ -597,6 +597,7 @@ void gpuSetTabulatedFunction(gpuContext gpu, int index, const string& name, cons
gpu->tabulatedFunctions[index].name = name;
gpu->tabulatedFunctions[index].min = min;
gpu->tabulatedFunctions[index].max = max;
gpu->tabulatedFunctionsChanged = true;
// First create a padded set of function values.
......
......@@ -43,6 +43,8 @@ struct gpuMoleculeGroup {
};
struct gpuTabulatedFunction {
gpuTabulatedFunction() : coefficients(NULL) {
}
std::string name;
double min, max;
CUDAStream<float4>* coefficients;
......@@ -75,7 +77,7 @@ struct _gpuContext {
std::vector<gpuMoleculeGroup> moleculeGroups;
gpuTabulatedFunction tabulatedFunctions[MAX_TABULATED_FUNCTIONS];
std::vector<int3> posCellOffsets;
float iterations;
int iterations;
float epsfac;
float solventDielectric;
float soluteDielectric;
......@@ -85,6 +87,7 @@ struct _gpuContext {
bool bRecalculateBornRadii;
bool bOutputBufferPerWarp;
bool bIncludeGBSA;
bool tabulatedFunctionsChanged;
unsigned long seed;
SM_VERSION sm_version;
compactionPlan compactPlan;
......
......@@ -55,6 +55,11 @@ static __constant__ Expression<128> energyExp;
static __constant__ Expression<64> combiningRules[4];
static __constant__ float globalParams[8];
texture<float4, 1, cudaReadModeElementType> texRef0;
texture<float4, 1, cudaReadModeElementType> texRef1;
texture<float4, 1, cudaReadModeElementType> texRef2;
texture<float4, 1, cudaReadModeElementType> texRef3;
void SetCalculateCustomNonbondedForcesSim(gpuContext gpu)
{
cudaError_t status;
......@@ -157,7 +162,15 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
else
{
int index = floor((x-params.x)*params.z);
float4 coeff = cSim.pTabulatedFunctionCoefficients[function][index];
float4 coeff;
if (function == 0)
coeff = tex1Dfetch(texRef0, index);
else if (function == 1)
coeff = tex1Dfetch(texRef1, index);
else if (function == 2)
coeff = tex1Dfetch(texRef2, index);
else
coeff = tex1Dfetch(texRef3, index);
x = (x-params.x)*params.z-index;
if (op == CUSTOM)
STACK(stackPointer) = coeff.x+x*(coeff.y+x*(coeff.z+x*coeff.w));
......@@ -263,135 +276,6 @@ __device__ float kEvaluateExpression_kernel(Expression<SIZE>* expression, float*
}
}
}
// switch (expression->op[i])
// {
// case CONSTANT:
// STACK(++stackPointer) = expression->arg[i];
// break;
// case VARIABLE0:
// STACK(++stackPointer) = var0;
// break;
// case VARIABLE1:
// STACK(++stackPointer) = vars1.x;
// break;
// case VARIABLE2:
// STACK(++stackPointer) = vars1.y;
// break;
// case VARIABLE3:
// STACK(++stackPointer) = vars1.z;
// break;
// case VARIABLE4:
// STACK(++stackPointer) = vars1.w;
// break;
// case VARIABLE5:
// STACK(++stackPointer) = vars2.x;
// break;
// case VARIABLE6:
// STACK(++stackPointer) = vars2.y;
// break;
// case VARIABLE7:
// STACK(++stackPointer) = vars2.z;
// break;
// case VARIABLE8:
// STACK(++stackPointer) = vars2.w;
// break;
// case GLOBAL:
// STACK(++stackPointer) = globalParams[(int) expression->arg[i]];
// break;
// case ADD:
// {
// float temp = STACK(stackPointer);
// STACK(--stackPointer) += temp;
// break;
// }
// case SUBTRACT:
// {
// float temp = STACK(stackPointer);
// STACK(stackPointer) = temp-STACK(--stackPointer);
// break;
// }
// case MULTIPLY:
// {
// float temp = STACK(stackPointer);
// STACK(--stackPointer) *= temp;
// break;
// }
// case DIVIDE:
// {
// float temp = STACK(stackPointer);
// STACK(stackPointer) = temp/STACK(--stackPointer);
// break;
// }
// case POWER:
// {
// float temp = STACK(stackPointer);
// STACK(stackPointer) = pow(temp, STACK(--stackPointer));
// break;
// }
// case NEGATE:
// STACK(stackPointer) *= -1.0f;
// break;
// case SQRT:
// STACK(stackPointer) = sqrt(STACK(stackPointer));
// break;
// case EXP:
// STACK(stackPointer) = exp(STACK(stackPointer));
// break;
// case LOG:
// STACK(stackPointer) = log(STACK(stackPointer));
// break;
// case SIN:
// STACK(stackPointer) = sin(STACK(stackPointer));
// break;
// case COS:
// STACK(stackPointer) = cos(STACK(stackPointer));
// break;
// case SEC:
// STACK(stackPointer) = 1.0f/cos(STACK(stackPointer));
// break;
// case CSC:
// STACK(stackPointer) = 1.0f/sin(STACK(stackPointer));
// break;
// case TAN:
// STACK(stackPointer) = tan(STACK(stackPointer));
// break;
// case COT:
// STACK(stackPointer) = 1.0f/tan(STACK(stackPointer));
// break;
// case ASIN:
// STACK(stackPointer) = asin(STACK(stackPointer));
// break;
// case ACOS:
// STACK(stackPointer) = acos(STACK(stackPointer));
// break;
// case ATAN:
// STACK(stackPointer) = atan(STACK(stackPointer));
// break;
// case SQUARE:
// {
// float temp = STACK(stackPointer);
// STACK(stackPointer) *= temp;
// break;
// }
// case CUBE:
// {
// float temp = STACK(stackPointer);
// STACK(stackPointer) *= temp*temp;
// break;
// }
// case RECIPROCAL:
// STACK(stackPointer) = 1.0f/STACK(stackPointer);
// break;
// case ADD_CONSTANT:
// STACK(stackPointer) += expression->arg[i];
// break;
// case MULTIPLY_CONSTANT:
// STACK(stackPointer) *= expression->arg[i];
// break;
// case POWER_CONSTANT:
// STACK(stackPointer) = pow(STACK(stackPointer), expression->arg[i]);
// break;
// }
}
return STACK(stackPointer);
}
......@@ -439,6 +323,19 @@ __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int* workU
void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
{
// printf("kCalculateCustomNonbondedCutoffForces\n");
if (gpu->tabulatedFunctionsChanged)
{
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
if (gpu->tabulatedFunctions[0].coefficients != NULL)
cudaBindTexture(NULL, &texRef0, gpu->tabulatedFunctions[0].coefficients->_pDevData, &channelDesc, gpu->tabulatedFunctions[0].coefficients->_length*sizeof(float4));
if (gpu->tabulatedFunctions[1].coefficients != NULL)
cudaBindTexture(NULL, &texRef1, gpu->tabulatedFunctions[1].coefficients->_pDevData, &channelDesc, gpu->tabulatedFunctions[1].coefficients->_length*sizeof(float4));
if (gpu->tabulatedFunctions[2].coefficients != NULL)
cudaBindTexture(NULL, &texRef2, gpu->tabulatedFunctions[2].coefficients->_pDevData, &channelDesc, gpu->tabulatedFunctions[2].coefficients->_length*sizeof(float4));
if (gpu->tabulatedFunctions[3].coefficients != NULL)
cudaBindTexture(NULL, &texRef3, gpu->tabulatedFunctions[3].coefficients->_pDevData, &channelDesc, gpu->tabulatedFunctions[3].coefficients->_length*sizeof(float4));
gpu->tabulatedFunctionsChanged = false;
}
int sharedPerThread = sizeof(Atom)+gpu->sim.customExpressionStackSize*sizeof(float);
if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment