Added debugging code

e3196201 · Mark Friedrichs · 227aa72a · e3196201 · e3196201
Commit e3196201 authored Sep 15, 2010 by Mark Friedrichs
2 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
@@ -6,7 +6,7 @@
 #include "amoebaCudaKernels.h"
 #include "kCalculateAmoebaCudaUtilities.h"

-//#define AMOEBA_DEBUG
+#define AMOEBA_DEBUG

 static __constant__ cudaGmxSimulation cSim;
 static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
@@ -35,19 +35,6 @@ static int const PScaleIndex            =  0;
 static int const DScaleIndex            =  1; 
 static int const UScaleIndex            =  2; 
 static int const MScaleIndex            =  3;
-//static int const Scale3Index            =  4;
-//static int const Scale5Index            =  5;
-//static int const Scale7Index            =  6;
-//static int const Scale9Index            =  7;
-//static int const Ddsc30Index            =  8;
-//static int const Ddsc31Index            =  9;
-//static int const Ddsc32Index            = 10; 
-//static int const Ddsc50Index            = 11;
-//static int const Ddsc51Index            = 12;
-//static int const Ddsc52Index            = 13; 
-//static int const Ddsc70Index            = 14;
-//static int const Ddsc71Index            = 15;
-//static int const Ddsc72Index            = 16;
 static int const LastScalingIndex       =  4;

 struct PmeDirectElectrostaticParticle {
@@ -296,6 +283,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
                float temp3    = -3.0f * damp * expdamp / r2;
                float temp5    = -damp;
                float temp7    = -0.2f - 0.6f*damp;
+
                ddsc3[1]       = temp3 * xr;
                ddsc3[2]       = temp3 * yr;
                ddsc3[3]       = temp3 * zr;
@@ -1201,6 +1189,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
  
        int maxPrint        = 1400;
        float conversion    = 1.0f/41.84f;
+        float forceSum[3]   = { 0.0f, 0.0f, 0.0f};
        for( int ii = 0; ii < gpu->natoms; ii++ ){
            (void) fprintf( amoebaGpu->log, "%5d ", ii); 
  
@@ -1213,6 +1202,10 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
                            conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
                            conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );

+            forceSum[0]         += amoebaGpu->psForce->_pSysStream[0][indexOffset];
+            forceSum[1]         += amoebaGpu->psForce->_pSysStream[0][indexOffset+1];
+            forceSum[2]         += amoebaGpu->psForce->_pSysStream[0][indexOffset+2];
+
            // torque
  
            (void) fprintf( amoebaGpu->log,"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] ",
@@ -1225,6 +1218,17 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
                ii = gpu->natoms - maxPrint;
            }
        }
+        gpu->psEnergy->Download();
+        double energy = 0.0;
+        for( unsigned int ii = 0; ii < gpu->sim.energyOutputBuffers; ii++ ){
+            if( (*gpu->psEnergy)[ii] !=  (*gpu->psEnergy)[ii] || (*gpu->psEnergy)[ii] == std::numeric_limits<double>::infinity() || (*gpu->psEnergy)[ii] == -std::numeric_limits<double>::infinity() ){
+                (void) fprintf( amoebaGpu->log,"Energy nan at index=%d\n", ii );
+            } else {
+               energy += (*gpu->psEnergy)[ii];
+            }
+        }   
+        (void) fprintf( amoebaGpu->log,"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e\n", forceSum[0], forceSum[1], forceSum[2], energy );
+
        if( 1 ){
            (void) fprintf( amoebaGpu->log,"DebugElec\n" );
            int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
@@ -205,13 +205,13 @@ if( atomI == targetAtom ){
    debugArray[index].x                = mask ? torque[0][0] : 0.0f;
    debugArray[index].y                = mask ? torque[0][1] : 0.0f;
    debugArray[index].z                = mask ? torque[0][2] : 0.0f;
-    debugArray[index].w                = blockId;
+    debugArray[index].w                = mask ? energy       : 0.0f;

    index                             += cAmoebaSim.paddedNumberOfAtoms;
    debugArray[index].x                = mask ? torque[0][0] : 0.0f;
    debugArray[index].y                = mask ? torque[0][1] : 0.0f;
    debugArray[index].z                = mask ? torque[0][2] : 0.0f;
-    debugArray[index].w                = blockId;
+    debugArray[index].w                = (float) (blockIdx.x * blockDim.x + threadIdx.x);

    for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
        index                             += cAmoebaSim.paddedNumberOfAtoms;
@@ -364,13 +364,13 @@ if( atomI == targetAtom  || atomJ == targetAtom ){
    debugArray[index].x                = mask ? torque[indexI][0] : 0.0f;
    debugArray[index].y                = mask ? torque[indexI][1] : 0.0f;
    debugArray[index].z                = mask ? torque[indexI][2] : 0.0f;
-    debugArray[index].w                = blockId;
+    debugArray[index].w                = mask ? energy       : 0.0f;

    index                             += cAmoebaSim.paddedNumberOfAtoms;
    debugArray[index].x                = mask ? torque[indexJ][0] : 0.0f;
    debugArray[index].y                = mask ? torque[indexJ][1] : 0.0f;
    debugArray[index].z                = mask ? torque[indexJ][2] : 0.0f;
-    debugArray[index].w                = blockId;
+    debugArray[index].w                = (float) (blockIdx.x * blockDim.x + threadIdx.x);

    for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
        index                             += cAmoebaSim.paddedNumberOfAtoms;
@@ -460,25 +460,24 @@ if( atomI == targetAtom  || atomJ == targetAtom ){
        debugArray[index].y                = (float) atomJ;
        debugArray[index].z                = (float) y;
        debugArray[index].w                = blockId;
-        index                             += cAmoebaSim.paddedNumberOfAtoms;

+        index                             += cAmoebaSim.paddedNumberOfAtoms;
        debugArray[index].x                = mask ? forceSign*force[0]     : 0.0f;
        debugArray[index].y                = mask ? forceSign*force[1]     : 0.0f;
        debugArray[index].z                = mask ? forceSign*force[2]     : 0.0f;
        debugArray[index].w                = blockId;

-
        index                             += cAmoebaSim.paddedNumberOfAtoms;
        debugArray[index].x                = mask ? torque[indexI][0] : 0.0f;
        debugArray[index].y                = mask ? torque[indexI][1] : 0.0f;
        debugArray[index].z                = mask ? torque[indexI][2] : 0.0f;
-        debugArray[index].w                = blockId;
+        debugArray[index].w                = energy;

        index                             += cAmoebaSim.paddedNumberOfAtoms;
        debugArray[index].x                = mask ? torque[indexJ][0] : 0.0f;
        debugArray[index].y                = mask ? torque[indexJ][1] : 0.0f;
        debugArray[index].z                = mask ? torque[indexJ][2] : 0.0f;
-        debugArray[index].w                = blockId;
+        debugArray[index].w                = (float) (blockIdx.x * blockDim.x + threadIdx.x);

        for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
            index                             += cAmoebaSim.paddedNumberOfAtoms;
@@ -611,5 +610,6 @@ if( atomI == targetAtom  || atomJ == targetAtom ){

        pos++;
    }
+//printf( "Hello thread: %d %d %d %d\n", blockIdx.x * blockDim.x + threadIdx.x, blockIdx.x, blockDim.x, threadIdx.x );
    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
 }