Added tapering function to AMOEBA vdw function; fixed issue w/ zeroing of vdw forces.

4e480ff6 · Mark Friedrichs · cb0b20f5 · 4e480ff6 · 4e480ff6 · 4e480ff6
Commit 4e480ff6 authored Apr 23, 2012 by Mark Friedrichs
4 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
@@ -447,7 +447,12 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
    (void) fprintf( log, "     vdwSigmaCombiningRule              %d\n",      amoebaGpu->vdwSigmaCombiningRule);
    (void) fprintf( log, "     vdwEpsilonCombiningRule            %d\n",      amoebaGpu->vdwEpsilonCombiningRule);
    (void) fprintf( log, "     vdwUsePBC                          %d\n",      amoebaGpu->amoebaSim.vdwUsePBC);
+    (void) fprintf( log, "     vdwCutoff                          %15.7e\n",  amoebaGpu->amoebaSim.vdwCutoff);
    (void) fprintf( log, "     vdwCutoff2                         %15.7e\n",  amoebaGpu->amoebaSim.vdwCutoff2);
+    (void) fprintf( log, "     vdwTaperCutoff                     %15.7e\n",  amoebaGpu->amoebaSim.vdwTaperCutoff );
+    if( amoebaGpu->amoebaSim.vdwCutoff > 0.0f ){
+        (void) fprintf( log, "     vdwTaper                           %8.3f\n",  amoebaGpu->amoebaSim.vdwTaperCutoff/amoebaGpu->amoebaSim.vdwCutoff);
+    }
    totalMemory += gpuPrintCudaStreamFloat2( amoebaGpu->psVdwSigmaEpsilon, log );
    totalMemory += gpuPrintCudaStreamInt( amoebaGpu->psAmoebaVdwNonReductionID, log );
    totalMemory += gpuPrintCudaStreamInt4( amoebaGpu->psAmoebaVdwReductionID, log );
@@ -2243,6 +2248,84 @@ static int decodeCell( int cellCode, unsigned int* x, unsigned int* y, unsigned
 	return 0;
 }
+static void getVdwTaper( double r, double vdwTaperCoefficients[6], double* taper, double* dtaper ){
+       double r2 = r*r;
+       *taper    = r*( r2*(vdwTaperCoefficients[5]*r2 +
+                           vdwTaperCoefficients[3])   +
+                           vdwTaperCoefficients[1])   +
+                     ( r2*(vdwTaperCoefficients[4]*r2   +
+                           vdwTaperCoefficients[2])   +
+                           vdwTaperCoefficients[0]);
+       *dtaper   = ( r2*(5.0*vdwTaperCoefficients[5]*r2   + 3.0*vdwTaperCoefficients[3]) + vdwTaperCoefficients[1]) +
+                      r*(4.0*vdwTaperCoefficients[4]*r2   + 2.0*vdwTaperCoefficients[2]);
+}
+static void lookupVdwTaperLinear( float r, double vdwTaperCut, double delta,
+                                  std::vector<float>& taperTable,  std::vector<float>& dtaperTable, float* taper, float* dtaper ){
+    //int index        = static_cast<int>(floor( (r - vdwTaperCut)/delta));
+    int index        = static_cast<int>(round( (r - vdwTaperCut)/delta));
+    if( index > taperTable.size()-1 ){
+        *taper = *dtaper = 0.0f;
+    } else {
+        float slope      = (taperTable[index+1] - taperTable[index])/delta;
+        float intercept  = taperTable[index+1] - slope*(delta*static_cast<float>(index+1));
+        *taper           = slope*(r-vdwTaperCut) + intercept; 
+        slope            = (dtaperTable[index+1] - dtaperTable[index])/delta;
+        intercept        = dtaperTable[index+1] - slope*(delta*static_cast<float>(index+1));
+        *dtaper          = slope*(r-vdwTaperCut) + intercept; 
+//fprintf( stderr, "TaperTest: %3d r=%15.7e m=%15.7e b=%15.7e taper=%15.7e %15.7e\n", index, r, slope, intercept, *taper, *dtaper );
+    }
+}
+static void lookupVdwTaper( float r, double vdwTaperCut, double delta,
+                            std::vector<float>& taperTable,  std::vector<float>& dtaperTable, float* taper, float* dtaper ){
+    int index        = static_cast<int>(round( (r - vdwTaperCut)/delta));
+    if( index > taperTable.size()-2 ){
+        *taper = *dtaper = 0.0f;
+    } else if( index == 0){
+        *taper  = 1.0f;
+        *dtaper = 0.0f;
+    } else {
+        float x          = r-vdwTaperCut;
+        float x0         = delta*static_cast<float>(index-1);
+        float y0         = taperTable[index-1];
+        float x1         = x0 + delta;
+        float y1         = taperTable[index];
+        float x2         = x1 + delta;
+        float y2         = taperTable[index+1];
+        *taper           = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) + 
+                           y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) + 
+                           y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
+              y0         = dtaperTable[index-1];
+              y1         = dtaperTable[index];
+              y2         = dtaperTable[index+1];
+        *dtaper          = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) + 
+                           y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) + 
+                           y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
+//fprintf( stderr, "TaperTest: %3d r=%15.7e x=%15.7e x0=%15.7e x1=%15.7e x2=%15.7e  V=%15.7e %15.7e\n", 
+//         index, r, x, x0, x1, x2, *taper, *dtaper );
+    } 
+}
 extern "C"
 void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
                                const std::vector<int>& indexIVs, 
@@ -2265,9 +2348,86 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
    unsigned int particles                 = sigmas.size();
    amoebaGpu->amoebaSim.vdwUsePBC         = usePBC;
+    amoebaGpu->amoebaSim.vdwCutoff         = cutoff;
    amoebaGpu->amoebaSim.vdwCutoff2        = cutoff*cutoff;
+    double vdwTaper                         = 0.90f;
+    if( vdwTaper < 1.0 ){
+        double vdwTaperCoefficients[6];
+        double vdwCut       = cutoff;
+        double vdwTaperCut  = vdwTaper*cutoff;
+        amoebaGpu->amoebaSim.vdwTaperCutoff  = vdwTaperCut;
+        amoebaGpu->amoebaSim.vdwTaperCutoff2 = vdwTaperCut*vdwTaperCut;
+        double vdwCut2      = vdwCut*vdwCut;
+        double vdwCut3      = vdwCut2*vdwCut;
+        double vdwCut4      = vdwCut2*vdwCut2;
+        double vdwCut5      = vdwCut2*vdwCut3;
+        double vdwCut6      = vdwCut3*vdwCut3;
+        double vdwCut7      = vdwCut3*vdwCut4;
+        double vdwTaperCut2 = vdwTaperCut*vdwTaperCut;
+        double vdwTaperCut3 = vdwTaperCut2*vdwTaperCut;
+        double vdwTaperCut4 = vdwTaperCut2*vdwTaperCut2;
+        double vdwTaperCut5 = vdwTaperCut2*vdwTaperCut3;
+        double vdwTaperCut6 = vdwTaperCut3*vdwTaperCut3;
+        double vdwTaperCut7 = vdwTaperCut3*vdwTaperCut4;
+        // get 5th degree multiplicative switching function coefficients;
+       double denom  = 1.0/(vdwCut-vdwTaperCut);
+       double denom2 = denom*denom;
+       denom         = denom*denom2*denom2;
+       vdwTaperCoefficients[0] = vdwCut*vdwCut2 * (vdwCut2-5.0*vdwCut*vdwTaperCut+10.0*vdwTaperCut2)*denom;
+       vdwTaperCoefficients[1] = -30.0*vdwCut2*vdwTaperCut2*denom;
+       vdwTaperCoefficients[2] =  30.0*(vdwCut2*vdwTaperCut+vdwCut*vdwTaperCut2)*denom;
+       vdwTaperCoefficients[3] = -10.0*(vdwCut2+4.0*vdwCut*vdwTaperCut+vdwTaperCut2)*denom;
+       vdwTaperCoefficients[4] =  15.0*(vdwCut+vdwTaperCut)*denom;
+       vdwTaperCoefficients[5] =  -6.0*denom;
+       // table
+       int vdwTableSize      = VDW_TAPER_TABLE_SIZE;
+       double tableDelta     = vdwCut - vdwTaperCut;
+       double tableIncrement = tableDelta/static_cast<double>(vdwTableSize-1);
+       amoebaGpu->amoebaSim.vdwTaperDelta = static_cast<float>(tableIncrement);
+       std::vector<float> taper;
+       std::vector<float> dtaper;
+       for( unsigned int ii = 0; ii < vdwTableSize; ii ++ ){
+           double r = vdwTaperCut + static_cast<double>(ii)*tableIncrement;
+           double taperValue, dtaperValue;
+           getVdwTaper( r, vdwTaperCoefficients, &taperValue, &dtaperValue );
+           taper.push_back(  static_cast<float>(taperValue) );
+           dtaper.push_back( static_cast<float>(dtaperValue) );
+           amoebaGpu->amoebaSim.vdwTaperTable[ii]   = taperValue;
+           amoebaGpu->amoebaSim.vdw_dTaperTable[ii] = dtaperValue;
+           //fprintf( stderr, "Taper: %3d %15.7e  %15.7e %15.7e\n", ii, r, taperValue, dtaperValue );
+       }
+       amoebaGpu->amoebaSim.vdwTaperTable[VDW_TAPER_TABLE_SIZE]   = 0.0f;
+       amoebaGpu->amoebaSim.vdw_dTaperTable[VDW_TAPER_TABLE_SIZE] = 0.0f;
+       if( 0 ){
+           for( unsigned int ii = 0; ii < vdwTableSize; ii ++ ){
+               float r = vdwTaperCut + (static_cast<double>(ii)+0.5)*tableIncrement;
+               float taperValue, dtaperValue;
+               float taperValueL, dtaperValueL;
+               double taperValueD, dtaperValueD;
+               lookupVdwTaperLinear( r, vdwTaperCut, tableIncrement, taper, dtaper, &taperValueL, &dtaperValueL );
+               lookupVdwTaper( r, vdwTaperCut, tableIncrement, taper, dtaper, &taperValue, &dtaperValue );
+               getVdwTaper( static_cast<double>(r), vdwTaperCoefficients, &taperValueD, &dtaperValueD );
+               double diffTaperL = fabs( (taperValueL-taperValueD )/taperValueD);
+               double diffdTaperL= fabs( (dtaperValueL-dtaperValueD )/dtaperValueD);
+               double diffTaper  = fabs( (taperValue-taperValueD )/taperValueD);
+               double diffdTaper = fabs( (dtaperValue-dtaperValueD )/dtaperValueD);
+    fprintf( stderr, "TT: %3d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e %15.7e\n", 
+                   ii, r, diffTaper, diffTaperL, taperValueL, taperValue, taperValueD, diffdTaper, diffdTaperL, dtaperValueL, dtaperValue, dtaperValueD );
+           }
+       }
+    }
-    // set sigma combining rule flag
+    //  sigma combining rule flag
    if( vdwSigmaCombiningRule.compare( "ARITHMETIC" ) == 0 ){
        amoebaGpu->vdwSigmaCombiningRule = 1;
@@ -2832,6 +2992,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
                                          const float totalMaxWcaDispersionEnergy,
                                          const float epso, const float epsh, const float rmino, const float rminh,
                                          const float awater, const float shctd, const float dispoff )
 {
   // ---------------------------------------------------------------------------------------

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaTypes.h
@@ -164,7 +164,16 @@ struct cudaAmoebaGmxSimulation {
    float* pWorkArray_1_2;
    int vdwUsePBC;
+    float vdwCutoff;
    float vdwCutoff2;
+    float vdwTaperCutoff;
+    float vdwTaperCutoff2;
+    float vdwTaperDelta;
+#define VDW_TAPER_TABLE_SIZE 100
+    float vdwTaperTable[VDW_TAPER_TABLE_SIZE+1];
+    float vdw_dTaperTable[VDW_TAPER_TABLE_SIZE+1];
    unsigned int amoebaVdwNonReductions;
    int* pAmoebaVdwNonReductionID;
    unsigned int* pVdwWorkUnit;

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
@@ -116,6 +116,67 @@ __device__ void getVdw14_7CombindedSigmaEpsilon_kernel( int sigmaCombiningRule,
 }
+// lookup table w/ linear interpolation
+__device__ void lookupLinearVdwTaper( float r, float* taper, float* dtaper )
+{
+    if( r > (cAmoebaSim.vdwCutoff - cAmoebaSim.vdwTaperDelta) ){
+        *taper = *dtaper = 0.0f;
+    } else {
+        int index        = (int) (floor( (r - cAmoebaSim.vdwTaperCutoff)/cAmoebaSim.vdwTaperDelta));
+//        int index        = (int) round( (r - cAmoebaSim.vdwTaperCutoff)/cAmoebaSim.vdwTaperDelta);
+        float slope      = (cAmoebaSim.vdwTaperTable[index+1] - cAmoebaSim.vdwTaperTable[index])/cAmoebaSim.vdwTaperDelta;
+        float intercept  =  cAmoebaSim.vdwTaperTable[index+1] - slope*(cAmoebaSim.vdwTaperDelta*static_cast<float>(index+1));
+        *taper           = slope*(r-cAmoebaSim.vdwTaperCutoff) + intercept; 
+        slope            = (cAmoebaSim.vdw_dTaperTable[index+1] - cAmoebaSim.vdw_dTaperTable[index])/cAmoebaSim.vdwTaperDelta;
+        intercept        =  cAmoebaSim.vdw_dTaperTable[index+1]  - slope*(cAmoebaSim.vdwTaperDelta*static_cast<float>(index+1));
+        *dtaper          = slope*(r-cAmoebaSim.vdwTaperCutoff) + intercept; 
+    }
+}
+// lookup table w/ quadratic interpolation
+__device__ void lookupVdwTaper( float r, float* taper, float* dtaper )
+{
+    if( r > (cAmoebaSim.vdwCutoff - 2.0f*cAmoebaSim.vdwTaperDelta) ){
+        *taper = *dtaper = 0.0f;
+    } else {
+        float x          = r - cAmoebaSim.vdwTaperCutoff;
+//        int index        = (int) (floor(x)/cAmoebaSim.vdwTaperDelta);
+        int index        = (int) round(x/cAmoebaSim.vdwTaperDelta);
+        if( index ){
+            float x0         = cAmoebaSim.vdwTaperDelta*static_cast<float>(index-1);
+            float y0         = cAmoebaSim.vdwTaperTable[index-1];
+            float x1         = x0 + cAmoebaSim.vdwTaperDelta;
+            float y1         = cAmoebaSim.vdwTaperTable[index];
+            float x2         = x1 + cAmoebaSim.vdwTaperDelta;
+            float y2         = cAmoebaSim.vdwTaperTable[index+1];
+            *taper           = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) + 
+                               y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) + 
+                               y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
+                  y0         = cAmoebaSim.vdw_dTaperTable[index-1];
+                  y1         = cAmoebaSim.vdw_dTaperTable[index];
+                  y2         = cAmoebaSim.vdw_dTaperTable[index+1];
+            *dtaper          = y0*( (x-x1)*(x-x2)/((x0-x1)*(x0-x2))) + 
+                               y1*( (x-x0)*(x-x2)/((x1-x0)*(x1-x2))) + 
+                               y2*( (x-x0)*(x-x1)/((x2-x0)*(x2-x1)));
+        } else {
+            *taper  = 1.0f;
+            *dtaper = 0.0f;
+        }
+    }
+}
 __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma,    float combindedEpsilon,
                                                float force[3], float* energy)
 {
@@ -156,10 +217,20 @@ __device__ void calculateVdw14_7PairIxn_kernel( float combindedSigma,    float c
    *energy                                      = combindedEpsilon*combindedSigma7*tau7*( (combindedSigma7*gammaHal*rhoInverse) - 2.0f);
    float deltaE                                 = (-7.0f*(dTau*(*energy) + gTau))*rI;
+    if( r > cAmoebaSim.vdwTaperCutoff ){ 
+        float taper, dtaper;
+        lookupVdwTaper( r, &taper, &dtaper );
+        //lookupLinearVdwTaper( r, &taper, &dtaper );
+        deltaE   = (*energy)*dtaper + deltaE*taper;
+        *energy *= taper;
+    }
    force[0]                                    *= deltaE;
    force[1]                                    *= deltaE;
    force[2]                                    *= deltaE;
 }
 // perform reduction of force on H's and add to heavy atom partner
@@ -512,6 +583,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
        threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle), gpu->sharedMemoryPerBlock ), maxThreads);
    }    
    kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
    kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
@@ -162,15 +162,15 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
            }
+            // zero shared fields
+            zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
 #ifdef USE_CUTOFF
            unsigned int flags = cSim.pInteractionFlag[pos];
            if (flags == 0) {
            } else {
 #endif
-               // zero shared fields
-                zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
                if( bExclusionFlag )
                {