PME mods

6b0ad778 · Mark Friedrichs · f21e5169 · 6b0ad778 · 6b0ad778 · 6b0ad778
Commit 6b0ad778 authored Oct 01, 2010 by Mark Friedrichs
6 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
@@ -98,7 +98,7 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
 */
 // self-energy for PME
-/*
 __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy)
 {
    float term   = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
@@ -122,7 +122,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir
    *energy      = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f));
    *energy     += term*uii/3.0f;
    *energy     *= fterm;
-} */
+}
 // self-torque for PME
@@ -134,14 +134,13 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
    float uiy        = 0.5f*(atomI.inducedDipole[1] + atomI.inducedDipoleP[1]);
    float uiz        = 0.5f*(atomI.inducedDipole[2] + atomI.inducedDipoleP[2]);
-    atomI.torque[0] += term*(atomI.labFrameDipole[1]*uiz - atomI.labFrameDipole[2]*uiy);
+    atomI.torque[0] -= term*(atomI.labFrameDipole[1]*uiz - atomI.labFrameDipole[2]*uiy);
-    atomI.torque[1] += term*(atomI.labFrameDipole[2]*uix - atomI.labFrameDipole[0]*uiz);
+    atomI.torque[1] -= term*(atomI.labFrameDipole[2]*uix - atomI.labFrameDipole[0]*uiz);
-    atomI.torque[2] += term*(atomI.labFrameDipole[0]*uiy - atomI.labFrameDipole[1]*uix);
+    atomI.torque[2] -= term*(atomI.labFrameDipole[0]*uiy - atomI.labFrameDipole[1]*uix);
 }
 __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI,   PmeDirectElectrostaticParticle& atomJ,
-                                                               float* scalingFactors, float*  outputForce, float  outputTorque[2][3],
+                                                               float* scalingFactors, float*  outputForce, float  outputTorque[2][3], float* energy
-                                                               float* energy
 #ifdef AMOEBA_DEBUG
                                                               ,float4* debugArray 
 #endif
@@ -870,18 +869,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
        // increment gradient due to force and torque on first site;
-        outputForce[0]           = -conversionFactor*(ftm2[1] + ftm2i[1]);
+        outputForce[0]           = conversionFactor*(ftm2[1] + ftm2i[1]);
-        outputForce[1]           = -conversionFactor*(ftm2[2] + ftm2i[2]);
+        outputForce[1]           = conversionFactor*(ftm2[2] + ftm2i[2]);
-        outputForce[2]           = -conversionFactor*(ftm2[3] + ftm2i[3]);
+        outputForce[2]           = conversionFactor*(ftm2[3] + ftm2i[3]);
-        outputTorque[0][0]       = conversionFactor*(ttm2[1] + ttm2i[1]);
+        conversionFactor        *= -1.0;
-        outputTorque[0][1]       = conversionFactor*(ttm2[2] + ttm2i[2]);
+        outputTorque[0][0]       =  conversionFactor*(ttm2[1] + ttm2i[1]);
-        outputTorque[0][2]       = conversionFactor*(ttm2[3] + ttm2i[3]);
+        outputTorque[0][1]       =  conversionFactor*(ttm2[2] + ttm2i[2]);
+        outputTorque[0][2]       =  conversionFactor*(ttm2[3] + ttm2i[3]);
-        outputTorque[1][0]       = conversionFactor*(ttm3[1] + ttm3i[1]);
+        outputTorque[1][0]       =  conversionFactor*(ttm3[1] + ttm3i[1]);
-        outputTorque[1][1]       = conversionFactor*(ttm3[2] + ttm3i[2]);
+        outputTorque[1][1]       =  conversionFactor*(ttm3[2] + ttm3i[2]);
-        outputTorque[1][2]       = conversionFactor*(ttm3[3] + ttm3i[3]);
+        outputTorque[1][2]       =  conversionFactor*(ttm3[3] + ttm3i[3]);
-        //outputTorque[1][2]    = conversionFactor*(ttm3_2 + ttm3i_2);
 #ifdef AMOEBA_DEBUG
    int debugIndex               = 0;
@@ -1299,10 +1298,10 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
    }   
    delete debugArray;
 #endif
-   // ---------------------------------------------------------------------------------------
+    cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, gpu->psForce4 );
 }
 /**---------------------------------------------------------------------------------------
@@ -1315,7 +1314,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
 void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
 {
    cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
    kCalculateAmoebaPME( amoebaGpu );
 } 

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
@@ -79,13 +79,13 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
        PmeDirectElectrostaticParticle localParticle;
        loadPmeDirectElectrostaticShared(&localParticle, atomI );
-        localParticle.force[0]                   = 0.0f;
+        localParticle.force[0]        = 0.0f;
-        localParticle.force[1]                   = 0.0f;
+        localParticle.force[1]        = 0.0f;
-        localParticle.force[2]                   = 0.0f;
+        localParticle.force[2]        = 0.0f;
-        localParticle.torque[0]                  = 0.0f;
+        localParticle.torque[0]       = 0.0f;
-        localParticle.torque[1]                  = 0.0f;
+        localParticle.torque[1]       = 0.0f;
-        localParticle.torque[2]                  = 0.0f;
+        localParticle.torque[2]       = 0.0f;
        scalingFactors[PScaleIndex]   = 1.0f;
        scalingFactors[DScaleIndex]   = 1.0f;
@@ -236,9 +236,9 @@ if( atomI == targetAtom ){
            if( atomI < cAmoebaSim.numberOfAtoms ){
                calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticle );
-                //float energy;
+                float energy;
-                //calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
+                calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
-                //totalEnergy += energy;
+                totalEnergy += energy;
            }
            // Write results
@@ -282,10 +282,8 @@ if( atomI == targetAtom ){
 #endif
        }
-        else        // 100% utilization
+        else
        {
-            // Read fixed atom data into registers and GRF
            if (lasty != y)
            {
                // load shared data

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
@@ -382,10 +382,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
   --------------------------------------------------------------------------------------- */
+#ifdef AMOEBA_DEBUG
 static int isNanOrInfinity( double number ){
    return (number != number || number == std::numeric_limits<double>::infinity() || number == -std::numeric_limits<double>::infinity()) ? 1 : 0; 
 }
+#endif
 /**---------------------------------------------------------------------------------------
@@ -398,13 +399,9 @@ static int isNanOrInfinity( double number ){
 static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
 {
-   // ---------------------------------------------------------------------------------------
-   // ---------------------------------------------------------------------------------------
    gpuContext gpu    = amoebaGpu->gpuContext;
 #ifdef AMOEBA_DEBUG
    static const char* methodName = "computeCudaAmoebaPmeFixedEField";
    if( amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "\n%s\n", methodName ); (void) fflush( amoebaGpu->log );
@@ -414,15 +411,13 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
    // N2 debug array
    CUDAStream<float4>* debugArray             = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
-    memset( debugArray->_pSysStream[0],      0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
+    memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
    debugArray->Upload();
    // print intermediate results for the targetAtom 
    unsigned int targetAtom  = 0;
-#endif
    int maxPrint             = 3002;
    amoebaGpu->psE_Field->Download();
    (void) fprintf( amoebaGpu->log, "Recip EFields In\n" );
@@ -448,6 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
    }
    (void) fflush( amoebaGpu->log );
    (void) fprintf( amoebaGpu->log, "Recip EFields End\n" );
+#endif
    kClearFields_3( amoebaGpu, 2 );

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
@@ -725,8 +725,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
    amoebaGpu->mutualInducedDone             = done;
    amoebaGpu->mutualInducedConverged        = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
-#ifdef AMOEBA_DEBUG
-/*
    if( 0 ){
        std::vector<int> fileId;
        //fileId.push_back( 0 );
@@ -736,8 +734,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
        cudaLoadCudaFloatArray( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, outputVector );
        cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
     }
-*/
-#endif
   // ---------------------------------------------------------------------------------------
 }

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
@@ -389,18 +389,11 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
    if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){
        cudaComputeAmoebaElectrostatic( amoebaGpu );
+        // map torques to forces
+        cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, amoebaGpu->gpuContext->psForce4 );
    } else {
        cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
    }
-    // map torques to forces
-    cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, amoebaGpu->gpuContext->psForce4 );
-    if( 0 && amoebaGpu->log ){
-        (void) fprintf( amoebaGpu->log, "Done mapping torques -> forces%s\n", methodName.c_str() ); fflush( NULL );
-        (void) fflush( NULL );
-    }
 }
 #undef AMOEBA_DEBUG
--- a/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+++ b/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
@@ -3608,12 +3608,14 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS
               }
            } else if( field == "AmoebaRealPmeForce"                   || 
                       field == "AmoebaKSpacePmeForce"                 ||
+                       field == "AmoebaDirAndSForce"                   ||
                       field == "AmoebaSelfPmeForce"                   ){
                std::vector< std::vector<double> > vectorOfDoubleVectors;
                readVectorOfDoubleVectors( filePtr, tokens, vectorOfDoubleVectors, &lineCount, field, log );
                supplementary[field] = vectorOfDoubleVectors;
            } else if( field == "AmoebaRealPmeEnergy"                  || 
                       field == "AmoebaKSpacePmeEnergy"                ||
+                       field == "AmoebaDirAndSEnergy"                  ||
                       field == "AmoebaSelfPmeEnergy"                  ){
                double value = atof( tokens[1].c_str() );
                std::vector< std::vector<double> > vectorOfDoubleVectors;