Commit 09ae36bd authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Turn off debugging code

parent 0dd63d02
......@@ -3975,6 +3975,141 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle,
}
}
/**---------------------------------------------------------------------------------------
Check for nans in Cuda array
(1) download data from gpu
(2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
(3) report largest entry in absolute value, if no problems detected
(4) also by editing 'targetParticle', can track values around that index
@param numberOfParticles number of entries in array
@param entriesPerParticle entries/particles in array
@param array Cuda<float> array to check
@param order particle order index array
@param iteration tracking iteration
@param idString id string for check
@param log loggin file references
--------------------------------------------------------------------------------------- */
void checkForNans( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log )
{
// ---------------------------------------------------------------------------------------
array->Download();
int orderIndex = 0;
int errors = 0;
float maxValue = 0.0;
int maxIndex = 0;
int targetParticle = -9782;
for( int ii = 0; ii < numberOfParticles; ii++ ){
if( order ){
orderIndex = order[ii];
} else {
orderIndex = ii;
}
int newLine = 0;
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
(void) fprintf( log, "[%6d %6d %15.7e] ",
jj, orderIndex, array->_pSysData[entriesPerParticle*ii+jj] );
newLine++;
if( array->_pSysData[entriesPerParticle*ii+jj] != array->_pSysData[entriesPerParticle*ii+jj] ||
fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > 1.0e+8 ){
errors += 1;
}
}
if( fabs( array->_pSysData[entriesPerParticle*ii+jj] ) > fabs( maxValue ) ){
maxValue = array->_pSysData[entriesPerParticle*ii+jj];
maxIndex = ii;
}
}
if( newLine ) fprintf( log, "\n" );
}
if( errors == 0 ){
(void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
} else {
(void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
exit(-1);
}
}
/**---------------------------------------------------------------------------------------
Check for nans in Cuda<float4> array
(1) download data from gpu
(2) check for nans and large values (> 1.0e+08) in array, and report if any found and exit
(3) report largest entry in absolute value, if no problems detected
(4) also by editing 'targetParticle', can track values around that index
@param numberOfParticles number of entries in array
@param array Cuda<float4> array to check
@param order particle order index array
@param iteration tracking iteration
@param idString id string for check
@param log loggin file references
--------------------------------------------------------------------------------------- */
void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log )
{
// ---------------------------------------------------------------------------------------
array->Download();
int orderIndex = 0;
int errors = 0;
float maxValue = 0.0;
int maxIndex = 0;
int entriesPerParticle = 4;
int targetParticle = -9782;
float values[4];
for( int ii = 0; ii < numberOfParticles; ii++ ){
if( order ){
orderIndex = order[ii];
} else {
orderIndex = ii;
}
values[0] = array->_pSysData[ii].x;
values[1] = array->_pSysData[ii].y;
values[2] = array->_pSysData[ii].z;
values[3] = array->_pSysData[ii].w;
int newLine = 0;
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 || abs( ii - targetParticle ) < 3 ){
if( newLine == 0 )(void) fprintf( log, "%s %6d %6d ", idString.c_str(), iteration, ii );
newLine++;
(void) fprintf( log, "[%6d %6d %15.7e] ", jj, orderIndex, values[jj] );
if( values[jj] != values[jj] || fabs( values[jj] ) > 1.0e+8 ){
errors += 1;
}
}
if( fabs( values[jj] ) > fabs( maxValue ) ){
maxValue = values[jj];
maxIndex = ii;
}
}
if( newLine ) fprintf( log, "\n" );
}
if( errors == 0 ){
(void) fprintf( log, "%s %6d no errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
} else {
(void) fprintf( log, "%s %6d errors detected maxValue=%15.7e %6d.\n", idString.c_str(), iteration, maxValue, maxIndex );
exit(-1);
}
}
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
......
......@@ -152,6 +152,10 @@ extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerPartic
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue );
extern void checkForNans( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log );
extern void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log );
extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );
......
......@@ -915,7 +915,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
amoebaGpu->mutualInducedDone = done;
amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
if( amoebaGpu->log ){
if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration );
}
......
......@@ -542,7 +542,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu->psCurrentEpsilon->_pDevData );
LAUNCHERROR("kReduceMutualInducedFieldDelta");
if( amoebaGpu->log ){
if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration);
}
......
......@@ -686,7 +686,7 @@ void) fflush( amoebaGpu->log );
amoebaGpu->psCurrentEpsilon->_pDevData );
LAUNCHERROR("kReducePmeMutualInducedFieldDelta");
if( amoebaGpu->log ){
if( 0 && amoebaGpu->log ){
trackMutualInducedIterations( amoebaGpu, iteration);
}
......@@ -770,8 +770,10 @@ void) fflush( amoebaGpu->log );
amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
(void) fflush( amoebaGpu->log );
#endif
// exit if nan
if( amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
if( 0 && amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
(void) fprintf( amoebaGpu->log, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration );
exit(0);
}
......@@ -793,6 +795,12 @@ void) fflush( amoebaGpu->log );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
}
if( 0 ){
static int iteration = 0;
checkForNans( gpu->natoms, 3, amoebaGpu->psInducedDipole, gpu->psAtomIndex->_pSysData, ++iteration, "CudaPmeMI", stderr );
checkForNans( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, gpu->psAtomIndex->_pSysData, iteration, "CudaPmeMIPolar", stderr );
}
// ---------------------------------------------------------------------------------------
}
......
......@@ -553,6 +553,17 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
// compute lab frame moments
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreLabCoord", stderr );
}
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPreForce", stderr );
}
cudaComputeAmoebaLabFrameMoments( amoebaGpu );
if( 0 ){
......@@ -622,6 +633,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
} else {
cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
}
if( 0 ){
static int iteration = 0;
gpuContext gpu = amoebaGpu->gpuContext;
checkForNansFloat4( gpu->natoms, gpu->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "MultipoleForcesPstForce", stderr );
}
}
#undef AMOEBA_DEBUG
......@@ -533,6 +533,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(Vdw14_7Particle)), maxThreads);
}
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, ++iteration, "\n\nzCoordPreCopyVdw", stderr );
}
kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
......@@ -669,8 +674,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
#ifdef AMOEBA_DEBUG_PRINT
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution\n" );
static int iteration = 0;
(void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution step=%d\n", ++iteration );
(void) fflush( amoebaGpu->log );
#ifdef AMOEBA_DEBUG
......@@ -694,7 +699,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
(void) fprintf( amoebaGpu->log,"\n" );
}
#endif
/*
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
......@@ -711,15 +716,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
}
}
*/
}
#endif
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "PreVdw", stderr );
checkForNansFloat4( gpu->natoms, gpu->psPosq4, gpu->psAtomIndex->_pSysData, iteration, "zCoordPreVdw", stderr );
}
kReduceVdw14_7( amoebaGpu, amoebaGpu->psWorkArray_3_2 );
if( 0 ){
static int iteration = 0;
checkForNans( gpu->natoms, 3, amoebaGpu->psWorkArray_3_2, gpu->psAtomIndex->_pSysData, ++iteration, "Vdw32", stderr );
}
kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, amoebaGpu->gpuContext->psForce4 );
if( 1 ){
if( 0 ){
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* psTempForce = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce");
kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce );
......@@ -735,6 +753,11 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
//exit(0);
}
if( 0 ){
static int iteration = 0;
checkForNansFloat4( gpu->natoms, amoebaGpu->gpuContext->psForce4, gpu->psAtomIndex->_pSysData, ++iteration, "VdwForce", stderr );
}
#ifdef AMOEBA_DEBUG
delete debugArray;
#endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment