Commit 09ae36bd authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Turn off debugging code

parent 0dd63d02
...@@ -3975,6 +3975,141 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, ...@@ -3975,6 +3975,141 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle,
} }
} }
/**---------------------------------------------------------------------------------------
Check for nans in Cuda array
(1) download data from gpu
(2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
(3) report largest entry in absolute value, if no problems detected
(4) also by editing 'targetParticle', can track values around that index
@param numberOfParticles number of entries in array
@param entriesPerParticle entries/particles in array
@param array Cuda<float> array to check
@param order particle order index array
@param iteration tracking iteration
@param idString id string for check
@param log loggin file references
--------------------------------------------------------------------------------------- */
void checkForNans( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log )
{
// ---------------------------------------------------------------------------------------
array->Download();
int orderIndex = 0;
int errors = 0;
float maxValue = 0.0;
int maxIndex = 0;
int targetParticle = -9782;
for( int ii = 0; ii < numberOfParticles; ii++ ){
if( order ){
orderIndex = order[ii];
} else {
orderIndex = ii;
}
int newLine = 0;
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
(void) fprintf( log, "[%6d %6d %15.7e] ",
jj, orderIndex, array->_pSysData[entriesPerParticle*ii+jj] );
newLine++;
if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 ){
errors += 1;
}
}
if( fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > fabs( maxValue ) ){
maxValue = array->_pSysData[entriesPerParticle*ii+jj];
maxIndex = ii;
}
}
if( newLine ) fprintf( log, "\n" );
}
if( errors == 0 ){
(void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
} else {
(void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
exit(-1);
}
}
/**---------------------------------------------------------------------------------------
Check for nans in Cuda<float4> array
(1) download data from gpu
(2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
(3) report largest entry in absolute value, if no problems detected
(4) also by editing 'targetParticle', can track values around that index
@param numberOfParticles number of entries in array
@param array Cuda<float4> array to check
@param order particle order index array
@param iteration tracking iteration
@param idString id string for check
@param log loggin file references
--------------------------------------------------------------------------------------- */
void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log )
{
// ---------------------------------------------------------------------------------------
array->Download();
int orderIndex = 0;
int errors = 0;
float maxValue = 0.0;
int maxIndex = 0;
int entriesPerParticle = 4;
int targetParticle = -9782;
float values[4];
for( int ii = 0; ii < numberOfParticles; ii++ ){
if( order ){
orderIndex = order[ii];
} else {
orderIndex = ii;
}
values[0] = array->_pSysData[ii].x;
values[1] = array->_pSysData[ii].y;
values[2] = array->_pSysData[ii].z;
values[3] = array->_pSysData[ii].w;
int newLine = 0;
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
newLine++;
(void) fprintf( log, "[%6d %6d %15.7e] ", jj, orderIndex, values[jj] );
if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 ){
errors += 1;
}
}
if( fabs( values[jj] ) > fabs( maxValue ) ){
maxValue = values[jj];
maxIndex = ii;
}
}
if( newLine ) fprintf( log, "\n" );
}
if( errors == 0 ){
(void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
} else {
(void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
exit(-1);
}
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Load contents of arrays into vector Load contents of arrays into vector
......
...@@ -152,6 +152,10 @@ extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerPartic ...@@ -152,6 +152,10 @@ extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerPartic
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion ); extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector ); extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue ); extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue );
extern void checkForNans( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log );
extern void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log );
extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear ); extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );
......
...@@ -915,7 +915,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe ...@@ -915,7 +915,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
amoebaGpu->mutualInducedDone = done; amoebaGpu->mutualInducedDone = done;
amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1; amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
if( amoebaGpu->log ){ if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration ); trackMutualInducedIterations( amoebaGpu, iteration );
} }
......
...@@ -542,7 +542,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu ...@@ -542,7 +542,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu->psCurrentEpsilon->_pDevData ); amoebaGpu->psCurrentEpsilon->_pDevData );
LAUNCHERROR("kReduceMutualInducedFieldDelta"); LAUNCHERROR("kReduceMutualInducedFieldDelta");
if( amoebaGpu->log ){ if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration); trackMutualInducedIterations( amoebaGpu, iteration);
} }
......
...@@ -686,7 +686,7 @@ void) fflush( amoebaGpu->log ); ...@@ -686,7 +686,7 @@ void) fflush( amoebaGpu->log );
amoebaGpu->psCurrentEpsilon->_pDevData ); amoebaGpu->psCurrentEpsilon->_pDevData );
LAUNCHERROR("kReducePmeMutualInducedFieldDelta"); LAUNCHERROR("kReducePmeMutualInducedFieldDelta");
if( amoebaGpu->log ){ if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration); trackMutualInducedIterations( amoebaGpu, iteration);
} }
...@@ -770,8 +770,10 @@ void) fflush( amoebaGpu->log ); ...@@ -770,8 +770,10 @@ void) fflush( amoebaGpu->log );
amoebaGpu->psCurrentEpsilon->_pSysData[2], done ); amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#endif #endif
// exit if nan // exit if nan
if( amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
if( 0 && amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
(void) fprintf( amoebaGpu->log, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration ); (void) fprintf( amoebaGpu->log, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration );
exit(0); exit(0);
} }
...@@ -793,6 +795,12 @@ void) fflush( amoebaGpu->log ); ...@@ -793,6 +795,12 @@ void) fflush( amoebaGpu->log );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector ); cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
} }
if( 0 ){
static int iteration = 0;
checkForNans( gpu->natoms, 3, amoebaGpu->psInducedDipole, gpu->psAtomIndex->_pSysData, ++iteration, "CudaPmeMI", stderr );
checkForNans( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, gpu->psAtomIndex->_pSysData, iteration, "CudaPmeMIPolar", stderr );
}
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
} }
......
...@@ -553,6 +553,17 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG ...@@ -553,6 +553,17 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
// compute lab frame moments // compute lab frame moments
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreLabCoord", stderr );
}
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreForce", stderr );
}
cudaComputeAmoebaLabFrameMoments( amoebaGpu ); cudaComputeAmoebaLabFrameMoments( amoebaGpu );
if( 0 ){ if( 0 ){
...@@ -622,6 +633,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG ...@@ -622,6 +633,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
} else { } else {
cudaComputeAmoebaPmeElectrostatic( amoebaGpu ); cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
} }
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPstForce", stderr );
}
} }
#undef AMOEBA_DEBUG #undef AMOEBA_DEBUG
...@@ -533,6 +533,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -533,6 +533,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle)), maxThreads); threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle)), maxThreads);
} }
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "\n\nzCoordPreCopyVdw", stderr );
}
kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
...@@ -669,8 +674,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -669,8 +674,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
#ifdef AMOEBA_DEBUG_PRINT #ifdef AMOEBA_DEBUG_PRINT
if( amoebaGpu->log ){ if( amoebaGpu->log ){
static int iteration = 0;
(void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution\n" ); (void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution step=%d\n", ++iteration );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -694,7 +699,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -694,7 +699,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
(void) fprintf( amoebaGpu->log,"\n" ); (void) fprintf( amoebaGpu->log,"\n" );
} }
#endif #endif
/*
amoebaGpu->psWorkArray_3_2->Download(); amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download(); amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 ) //for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
...@@ -711,15 +716,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -711,15 +716,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] ); amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
} }
} }
*/
} }
#endif #endif
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "PreVdw", stderr );
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, iteration, "zCoordPreVdw", stderr );
}
kReduceVdw14_7( amoebaGpu, amoebaGpu->psWorkArray_3_2 ); kReduceVdw14_7( amoebaGpu, amoebaGpu->psWorkArray_3_2 );
if( 0 ){
static int iteration = 0;
checkForNans( gpu->natoms, 3, amoebaGpu->psWorkArray_3_2, gpu->psAtomIndex->_pSysData, ++iteration, "Vdw32", stderr );
}
kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 ); kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 ); kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
if( 1 ){ if( 0 ){
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* psTempForce = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce"); CUDAStream<float4>* psTempForce = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce");
kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce ); kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce );
...@@ -735,6 +753,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -735,6 +753,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
//exit(0); //exit(0);
} }
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "VdwForce", stderr );
}
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
delete debugArray; delete debugArray;
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment