Turn off debugging code

09ae36bd · Mark Friedrichs · 0dd63d02 · 09ae36bd · 09ae36bd · 09ae36bd
Commit 09ae36bd authored Mar 29, 2011 by Mark Friedrichs
7 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
@@ -3975,6 +3975,141 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle,
    }
 }
+/**---------------------------------------------------------------------------------------
+   Check for nans in Cuda array
+      (1) download data from gpu
+      (2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
+      (3) report largest entry in absolute value, if no problems detected 
+      (4) also by editing 'targetParticle', can track values around that index
+   @param numberOfParticles    number of entries in array
+   @param entriesPerParticle   entries/particles in array
+   @param array                Cuda<float> array to check
+   @param order                particle order index array
+   @param iteration            tracking iteration 
+   @param idString             id string for check
+   @param log                  loggin file references
+   --------------------------------------------------------------------------------------- */
+void checkForNans( int numberOfParticles, int entriesPerParticle,
+                   CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log )
+{
+    // ---------------------------------------------------------------------------------------
+    array->Download();
+    int orderIndex     = 0;
+    int errors         = 0; 
+    float maxValue     = 0.0;
+    int maxIndex       = 0;
+    int targetParticle = -9782;
+    for( int ii = 0; ii < numberOfParticles; ii++ ){ 
+        if( order ){
+            orderIndex = order[ii];
+        } else {
+            orderIndex = ii;
+        }
+        int newLine = 0;
+        for( int jj = 0; jj < entriesPerParticle; jj++ ) { 
+            if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
+                fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
+                if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
+                (void) fprintf( log, "[%6d %6d %15.7e] ",
+                                jj, orderIndex, array->_pSysData[entriesPerParticle*ii+jj] );
+                newLine++;
+                if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
+                    fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 ){
+                    errors += 1;
+                }
+            }
+            if( fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > fabs( maxValue ) ){
+                maxValue = array->_pSysData[entriesPerParticle*ii+jj];
+                maxIndex = ii;
+            }
+        }
+        if( newLine ) fprintf( log, "\n" );
+    }
+    if( errors == 0 ){
+        (void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
+    } else {
+        (void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
+        exit(-1);
+    }
+}
+/**---------------------------------------------------------------------------------------
+   Check for nans in Cuda<float4> array
+      (1) download data from gpu
+      (2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
+      (3) report largest entry in absolute value, if no problems detected 
+      (4) also by editing 'targetParticle', can track values around that index
+   @param numberOfParticles    number of entries in array
+   @param array                Cuda<float4> array to check
+   @param order                particle order index array
+   @param iteration            tracking iteration 
+   @param idString             id string for check
+   @param log                  loggin file references
+   --------------------------------------------------------------------------------------- */
+void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log )
+{
+    // ---------------------------------------------------------------------------------------
+    array->Download();
+    int orderIndex          = 0;
+    int errors              = 0; 
+    float maxValue          = 0.0;
+    int maxIndex            = 0;
+    int entriesPerParticle  = 4;
+    int targetParticle      = -9782;
+    float values[4];
+    for( int ii = 0; ii < numberOfParticles; ii++ ){ 
+        if( order ){
+            orderIndex = order[ii];
+        } else {
+            orderIndex = ii;
+        }
+        values[0] = array->_pSysData[ii].x;
+        values[1] = array->_pSysData[ii].y;
+        values[2] = array->_pSysData[ii].z;
+        values[3] = array->_pSysData[ii].w;
+        int newLine = 0;
+        for( int jj = 0; jj < entriesPerParticle; jj++ ) { 
+            if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
+                if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
+                newLine++;
+                (void) fprintf( log, "[%6d  %6d %15.7e] ", jj, orderIndex, values[jj] );
+                if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 ){
+                    errors += 1;
+                }
+            }
+            if( fabs( values[jj] ) > fabs( maxValue ) ){
+                maxValue = values[jj];
+                maxIndex = ii;
+            }
+        }
+        if( newLine ) fprintf( log, "\n" );
+    }
+    if( errors == 0 ){
+        (void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
+    } else {
+        (void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
+        exit(-1);
+    }
+}
 /**---------------------------------------------------------------------------------------
   Load contents of arrays into vector

--- a/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
@@ -152,6 +152,10 @@ extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerPartic
 extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
 extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
 extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue );
+extern void checkForNans( int numberOfParticles, int entriesPerParticle,
+                          CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log );
+extern void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log );
 extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
@@ -915,7 +915,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
    amoebaGpu->mutualInducedDone             = done;
    amoebaGpu->mutualInducedConverged        = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
-    if( amoebaGpu->log ){
+    if( 0 && amoebaGpu->log ){
        trackMutualInducedIterations( amoebaGpu, iteration );
    }

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
@@ -542,7 +542,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
           amoebaGpu->psCurrentEpsilon->_pDevData );
        LAUNCHERROR("kReduceMutualInducedFieldDelta");
-        if( amoebaGpu->log ){
+        if( 0 && amoebaGpu->log ){
            trackMutualInducedIterations( amoebaGpu, iteration);
        }

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
@@ -686,7 +686,7 @@ void) fflush( amoebaGpu->log );
           amoebaGpu->psCurrentEpsilon->_pDevData );
        LAUNCHERROR("kReducePmeMutualInducedFieldDelta");
-        if( amoebaGpu->log ){
+        if( 0 && amoebaGpu->log ){
            trackMutualInducedIterations( amoebaGpu, iteration);
        }
@@ -770,8 +770,10 @@ void) fflush( amoebaGpu->log );
                        amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
        (void) fflush( amoebaGpu->log );
 #endif
        // exit if nan
-        if( amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
+        if( 0 && amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
            (void) fprintf( amoebaGpu->log, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration );
            exit(0);
        }
@@ -793,6 +795,12 @@ void) fflush( amoebaGpu->log );
        cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
     }
+    if( 0 ){
+        static int iteration = 0;
+        checkForNans( gpu->natoms,  3, amoebaGpu->psInducedDipole, gpu->psAtomIndex->_pSysData,    ++iteration, "CudaPmeMI", stderr );
+        checkForNans( gpu->natoms,  3, amoebaGpu->psInducedDipolePolar, gpu->psAtomIndex->_pSysData, iteration, "CudaPmeMIPolar", stderr );
+     }
   // ---------------------------------------------------------------------------------------
 }

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
@@ -553,6 +553,17 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
    // compute lab frame moments
+    if( 0 ){
+        static int iteration = 0;
+        gpuContext gpu       = amoebaGpu->gpuContext;
+        checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreLabCoord", stderr );
+     }   
+    if( 0 ){
+        static int iteration = 0;
+        gpuContext gpu       = amoebaGpu->gpuContext;
+        checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreForce", stderr );
+     }   
    cudaComputeAmoebaLabFrameMoments( amoebaGpu );
    if( 0 ){
@@ -622,6 +633,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
    } else {
        cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
    }
+    if( 0 ){
+        static int iteration = 0;
+        gpuContext gpu       = amoebaGpu->gpuContext;
+        checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPstForce", stderr );
+     }   
 }
 #undef AMOEBA_DEBUG
--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
@@ -533,6 +533,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
        threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle)), maxThreads);
    }    
+    if( 0 ){
+        static int iteration = 0;
+        checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "\n\nzCoordPreCopyVdw", stderr );
+     }   
    kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
    kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
@@ -669,8 +674,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
 #ifdef AMOEBA_DEBUG_PRINT
    if( amoebaGpu->log ){
+        static int iteration = 0;
-        (void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution\n" );
+        (void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution step=%d\n", ++iteration );
        (void) fflush( amoebaGpu->log );
 #ifdef AMOEBA_DEBUG
@@ -694,7 +699,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
            (void) fprintf( amoebaGpu->log,"\n" );
        }
 #endif
+/*
        amoebaGpu->psWorkArray_3_2->Download();
        amoebaGpu->psWorkArray_3_1->Download();
        //for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
@@ -711,15 +716,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
                                amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
            }
        }
+*/
    }
 #endif
+    if( 0 ){
+        static int iteration = 0;
+        checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "PreVdw", stderr );
+        checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, iteration, "zCoordPreVdw", stderr );
+     }   
    kReduceVdw14_7( amoebaGpu, amoebaGpu->psWorkArray_3_2 );
+    if( 0 ){
+        static int iteration = 0;
+        checkForNans( gpu->natoms, 3, amoebaGpu->psWorkArray_3_2, gpu->psAtomIndex->_pSysData, ++iteration, "Vdw32", stderr );
+     }   
    kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
    kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
-    if( 1 ){
+    if( 0 ){
        int paddedNumberOfAtoms             = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
        CUDAStream<float4>* psTempForce     = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce");
        kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce );
@@ -735,6 +753,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
        //exit(0);
     }
+    if( 0 ){
+        static int iteration = 0;
+        checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "VdwForce", stderr );
+     }   
 #ifdef AMOEBA_DEBUG
    delete debugArray;
 #endif