Commit 4e480ff6 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added tapering function to AMOEBA vdw function; fixed issue w/ zeroing of vdw forces.

parent cb0b20f5
......@@ -447,7 +447,12 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " vdwSigmaCombiningRule %d\n", amoebaGpu->vdwSigmaCombiningRule);
(void) fprintf( log, " vdwEpsilonCombiningRule %d\n", amoebaGpu->vdwEpsilonCombiningRule);
(void) fprintf( log, " vdwUsePBC %d\n", amoebaGpu->amoebaSim.vdwUsePBC);
(void) fprintf( log, " vdwCutoff %15.7e\n", amoebaGpu->amoebaSim.vdwCutoff);
(void) fprintf( log, " vdwCutoff2 %15.7e\n", amoebaGpu->amoebaSim.vdwCutoff2);
(void) fprintf( log, " vdwTaperCutoff %15.7e\n", amoebaGpu->amoebaSim.vdwTaperCutoff );
if( amoebaGpu->amoebaSim.vdwCutoff > 0.0f ){
(void) fprintf( log, " vdwTaper %8.3f\n", amoebaGpu->amoebaSim.vdwTaperCutoff/amoebaGpu->amoebaSim.vdwCutoff);
}
totalMemory += gpuPrintCudaStreamFloat2( amoebaGpu->psVdwSigmaEpsilon, log );
totalMemory += gpuPrintCudaStreamInt( amoebaGpu->psAmoebaVdwNonReductionID, log );
totalMemory += gpuPrintCudaStreamInt4( amoebaGpu->psAmoebaVdwReductionID, log );
......@@ -2243,6 +2248,84 @@ static int decodeCell( int cellCode, unsigned int* x, unsigned int* y, unsigned
return 0;
}
static void getVdwTaper( double r, double vdwTaperCoefficients[6], double* taper, double* dtaper ){
double r2 = r*r;
*taper = r*( r2*(vdwTaperCoefficients[5]*r2 +
vdwTaperCoefficients[3]) +
vdwTaperCoefficients[1]) +
( r2*(vdwTaperCoefficients[4]*r2 +
vdwTaperCoefficients[2]) +
vdwTaperCoefficients[0]);
*dtaper = ( r2*(5.0*vdwTaperCoefficients[5]*r2 + 3.0*vdwTaperCoefficients[3]) + vdwTaperCoefficients[1]) +
r*(4.0*vdwTaperCoefficients[4]*r2 + 2.0*vdwTaperCoefficients[2]);
}
static void lookupVdwTaperLinear( float r, double vdwTaperCut, double delta,
std::vector<float>& taperTable, std::vector<float>& dtaperTable, float* taper, float* dtaper ){
//int index = static_cast<int>(floor( (r - vdwTaperCut)/delta));
int index = static_cast<int>(round( (r - vdwTaperCut)/delta));
if( index > taperTable.size()-1 ){
*taper = *dtaper = 0.0f;
} else {
float slope = (taperTable[index+1] - taperTable[index])/delta;
float intercept = taperTable[index+1] - slope*(delta*static_cast<float>(index+1));
*taper = slope*(r-vdwTaperCut) + intercept;
slope = (dtaperTable[index+1] - dtaperTable[index])/delta;
intercept = dtaperTable[index+1] - slope*(delta*static_cast<float>(index+1));
*dtaper = slope*(r-vdwTaperCut) + intercept;
//fprintf( stderr, "TaperTest: %3d r=%15.7e m=%15.7e b=%15.7e taper=%15.7e %15.7e\n", index, r, slope, intercept, *taper, *dtaper );
}
}
static void lookupVdwTaper( float r, double vdwTaperCut, double delta,
std::vector<float>& taperTable, std::vector<float>& dtaperTable, float* taper, float* dtaper ){
int index = static_cast<int>(round( (r - vdwTaperCut)/delta));
if( index > taperTable.size()-2 ){
*taper = *dtaper = 0.0f;
} else if( index == 0){
*taper = 1.0f;
*dtaper = 0.0f;
} else {
float x = r-vdwTaperCut;
float x0 = delta*static_cast<float>(index-1);
float y0 = taperTable[index-1];
float x1 = x0 + delta;
float y1 = taperTable[index];
float x2 = x1 + delta;
float y2 = taperTable[index+1];
*taper = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) +
y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) +
y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
y0 = dtaperTable[index-1];
y1 = dtaperTable[index];
y2 = dtaperTable[index+1];
*dtaper = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) +
y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) +
y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
//fprintf( stderr, "TaperTest: %3d r=%15.7e x=%15.7e x0=%15.7e x1=%15.7e x2=%15.7e V=%15.7e %15.7e\n",
// index, r, x, x0, x1, x2, *taper, *dtaper );
}
}
extern "C"
void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const std::vector<int>& indexIVs,
......@@ -2265,9 +2348,86 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
unsigned int particles = sigmas.size();
amoebaGpu->amoebaSim.vdwUsePBC = usePBC;
amoebaGpu->amoebaSim.vdwCutoff = cutoff;
amoebaGpu->amoebaSim.vdwCutoff2 = cutoff*cutoff;
double vdwTaper = 0.90f;
if( vdwTaper < 1.0 ){
double vdwTaperCoefficients[6];
double vdwCut = cutoff;
double vdwTaperCut = vdwTaper*cutoff;
amoebaGpu->amoebaSim.vdwTaperCutoff = vdwTaperCut;
amoebaGpu->amoebaSim.vdwTaperCutoff2 = vdwTaperCut*vdwTaperCut;
double vdwCut2 = vdwCut*vdwCut;
double vdwCut3 = vdwCut2*vdwCut;
double vdwCut4 = vdwCut2*vdwCut2;
double vdwCut5 = vdwCut2*vdwCut3;
double vdwCut6 = vdwCut3*vdwCut3;
double vdwCut7 = vdwCut3*vdwCut4;
double vdwTaperCut2 = vdwTaperCut*vdwTaperCut;
double vdwTaperCut3 = vdwTaperCut2*vdwTaperCut;
double vdwTaperCut4 = vdwTaperCut2*vdwTaperCut2;
double vdwTaperCut5 = vdwTaperCut2*vdwTaperCut3;
double vdwTaperCut6 = vdwTaperCut3*vdwTaperCut3;
double vdwTaperCut7 = vdwTaperCut3*vdwTaperCut4;
// get 5th degree multiplicative switching function coefficients;
double denom = 1.0/(vdwCut-vdwTaperCut);
double denom2 = denom*denom;
denom = denom*denom2*denom2;
vdwTaperCoefficients[0] = vdwCut*vdwCut2 * (vdwCut2-5.0*vdwCut*vdwTaperCut+10.0*vdwTaperCut2)*denom;
vdwTaperCoefficients[1] = -30.0*vdwCut2*vdwTaperCut2*denom;
vdwTaperCoefficients[2] = 30.0*(vdwCut2*vdwTaperCut+vdwCut*vdwTaperCut2)*denom;
vdwTaperCoefficients[3] = -10.0*(vdwCut2+4.0*vdwCut*vdwTaperCut+vdwTaperCut2)*denom;
vdwTaperCoefficients[4] = 15.0*(vdwCut+vdwTaperCut)*denom;
vdwTaperCoefficients[5] = -6.0*denom;
// table
int vdwTableSize = VDW_TAPER_TABLE_SIZE;
double tableDelta = vdwCut - vdwTaperCut;
double tableIncrement = tableDelta/static_cast<double>(vdwTableSize-1);
amoebaGpu->amoebaSim.vdwTaperDelta = static_cast<float>(tableIncrement);
std::vector<float> taper;
std::vector<float> dtaper;
for( unsigned int ii = 0; ii < vdwTableSize; ii ++ ){
double r = vdwTaperCut + static_cast<double>(ii)*tableIncrement;
double taperValue, dtaperValue;
getVdwTaper( r, vdwTaperCoefficients, &taperValue, &dtaperValue );
taper.push_back( static_cast<float>(taperValue) );
dtaper.push_back( static_cast<float>(dtaperValue) );
amoebaGpu->amoebaSim.vdwTaperTable[ii] = taperValue;
amoebaGpu->amoebaSim.vdw_dTaperTable[ii] = dtaperValue;
//fprintf( stderr, "Taper: %3d %15.7e %15.7e %15.7e\n", ii, r, taperValue, dtaperValue );
}
amoebaGpu->amoebaSim.vdwTaperTable[VDW_TAPER_TABLE_SIZE] = 0.0f;
amoebaGpu->amoebaSim.vdw_dTaperTable[VDW_TAPER_TABLE_SIZE] = 0.0f;
if( 0 ){
for( unsigned int ii = 0; ii < vdwTableSize; ii ++ ){
float r = vdwTaperCut + (static_cast<double>(ii)+0.5)*tableIncrement;
float taperValue, dtaperValue;
float taperValueL, dtaperValueL;
double taperValueD, dtaperValueD;
lookupVdwTaperLinear( r, vdwTaperCut, tableIncrement, taper, dtaper, &taperValueL, &dtaperValueL );
lookupVdwTaper( r, vdwTaperCut, tableIncrement, taper, dtaper, &taperValue, &dtaperValue );
getVdwTaper( static_cast<double>(r), vdwTaperCoefficients, &taperValueD, &dtaperValueD );
double diffTaperL = fabs( (taperValueL-taperValueD )/taperValueD);
double diffdTaperL= fabs( (dtaperValueL-dtaperValueD )/dtaperValueD);
double diffTaper = fabs( (taperValue-taperValueD )/taperValueD);
double diffdTaper = fabs( (dtaperValue-dtaperValueD )/dtaperValueD);
fprintf( stderr, "TT: %3d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n",
ii, r, diffTaper, diffTaperL, taperValueL, taperValue, taperValueD, diffdTaper, diffdTaperL, dtaperValueL, dtaperValue, dtaperValueD );
}
}
}
// set sigma combining rule flag
// sigma combining rule flag
if( vdwSigmaCombiningRule.compare( "ARITHMETIC" ) == 0 ){
amoebaGpu->vdwSigmaCombiningRule = 1;
......@@ -2832,6 +2992,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
const float totalMaxWcaDispersionEnergy,
const float epso, const float epsh, const float rmino, const float rminh,
const float awater, const float shctd, const float dispoff )
{
// ---------------------------------------------------------------------------------------
......
......@@ -164,7 +164,16 @@ struct cudaAmoebaGmxSimulation {
float* pWorkArray_1_2;
int vdwUsePBC;
float vdwCutoff;
float vdwCutoff2;
float vdwTaperCutoff;
float vdwTaperCutoff2;
float vdwTaperDelta;
#define VDW_TAPER_TABLE_SIZE 100
float vdwTaperTable[VDW_TAPER_TABLE_SIZE+1];
float vdw_dTaperTable[VDW_TAPER_TABLE_SIZE+1];
unsigned int amoebaVdwNonReductions;
int* pAmoebaVdwNonReductionID;
unsigned int* pVdwWorkUnit;
......
......@@ -116,6 +116,67 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
}
// lookup table w/ linear interpolation
__device__ void lookupLinearVdwTaper( float r, float* taper, float* dtaper )
{
if( r > (cAmoebaSim.vdwCutoff - cAmoebaSim.vdwTaperDelta) ){
*taper = *dtaper = 0.0f;
} else {
int index = (int) (floor( (r - cAmoebaSim.vdwTaperCutoff)/cAmoebaSim.vdwTaperDelta));
// int index = (int) round( (r - cAmoebaSim.vdwTaperCutoff)/cAmoebaSim.vdwTaperDelta);
float slope = (cAmoebaSim.vdwTaperTable[index+1] - cAmoebaSim.vdwTaperTable[index])/cAmoebaSim.vdwTaperDelta;
float intercept = cAmoebaSim.vdwTaperTable[index+1] - slope*(cAmoebaSim.vdwTaperDelta*static_cast<float>(index+1));
*taper = slope*(r-cAmoebaSim.vdwTaperCutoff) + intercept;
slope = (cAmoebaSim.vdw_dTaperTable[index+1] - cAmoebaSim.vdw_dTaperTable[index])/cAmoebaSim.vdwTaperDelta;
intercept = cAmoebaSim.vdw_dTaperTable[index+1] - slope*(cAmoebaSim.vdwTaperDelta*static_cast<float>(index+1));
*dtaper = slope*(r-cAmoebaSim.vdwTaperCutoff) + intercept;
}
}
// lookup table w/ quadratic interpolation
__device__ void lookupVdwTaper( float r, float* taper, float* dtaper )
{
if( r > (cAmoebaSim.vdwCutoff - 2.0f*cAmoebaSim.vdwTaperDelta) ){
*taper = *dtaper = 0.0f;
} else {
float x = r - cAmoebaSim.vdwTaperCutoff;
// int index = (int) (floor(x)/cAmoebaSim.vdwTaperDelta);
int index = (int) round(x/cAmoebaSim.vdwTaperDelta);
if( index ){
float x0 = cAmoebaSim.vdwTaperDelta*static_cast<float>(index-1);
float y0 = cAmoebaSim.vdwTaperTable[index-1];
float x1 = x0 + cAmoebaSim.vdwTaperDelta;
float y1 = cAmoebaSim.vdwTaperTable[index];
float x2 = x1 + cAmoebaSim.vdwTaperDelta;
float y2 = cAmoebaSim.vdwTaperTable[index+1];
*taper = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) +
y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) +
y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
y0 = cAmoebaSim.vdw_dTaperTable[index-1];
y1 = cAmoebaSim.vdw_dTaperTable[index];
y2 = cAmoebaSim.vdw_dTaperTable[index+1];
*dtaper = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) +
y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) +
y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
} else {
*taper = 1.0f;
*dtaper = 0.0f;
}
}
}
__device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float combindedEpsilon,
float force[3], float* energy)
{
......@@ -156,10 +217,20 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma, float c
*energy = combindedEpsilon*combindedSigma7*tau7*( (combindedSigma7*gammaHal*rhoInverse) - 2.0f);
float deltaE = (-7.0f*(dTau*(*energy) + gTau))*rI;
if( r > cAmoebaSim.vdwTaperCutoff ){
float taper, dtaper;
lookupVdwTaper( r, &taper, &dtaper );
//lookupLinearVdwTaper( r, &taper, &dtaper );
deltaE = (*energy)*dtaper + deltaE*taper;
*energy *= taper;
}
force[0] *= deltaE;
force[1] *= deltaE;
force[2] *= deltaE;
}
// perform reduction of force on H's and add to heavy atom partner
......@@ -512,6 +583,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle), gpu->sharedMemoryPerBlock ), maxThreads);
}
kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
......
......@@ -162,15 +162,15 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
}
// zero shared fields
zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0) {
} else {
#endif
// zero shared fields
zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
if( bExclusionFlag )
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment