Commit e3196201 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added debugging code

parent 227aa72a
......@@ -6,7 +6,7 @@
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
#define AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
......@@ -35,19 +35,6 @@ static int const PScaleIndex = 0;
static int const DScaleIndex = 1;
static int const UScaleIndex = 2;
static int const MScaleIndex = 3;
//static int const Scale3Index = 4;
//static int const Scale5Index = 5;
//static int const Scale7Index = 6;
//static int const Scale9Index = 7;
//static int const Ddsc30Index = 8;
//static int const Ddsc31Index = 9;
//static int const Ddsc32Index = 10;
//static int const Ddsc50Index = 11;
//static int const Ddsc51Index = 12;
//static int const Ddsc52Index = 13;
//static int const Ddsc70Index = 14;
//static int const Ddsc71Index = 15;
//static int const Ddsc72Index = 16;
static int const LastScalingIndex = 4;
struct PmeDirectElectrostaticParticle {
......@@ -296,6 +283,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float temp3 = -3.0f * damp * expdamp / r2;
float temp5 = -damp;
float temp7 = -0.2f - 0.6f*damp;
ddsc3[1] = temp3 * xr;
ddsc3[2] = temp3 * yr;
ddsc3[3] = temp3 * zr;
......@@ -1201,6 +1189,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
int maxPrint = 1400;
float conversion = 1.0f/41.84f;
float forceSum[3] = { 0.0f, 0.0f, 0.0f};
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
......@@ -1213,6 +1202,10 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );
forceSum[0] += amoebaGpu->psForce->_pSysStream[0][indexOffset];
forceSum[1] += amoebaGpu->psForce->_pSysStream[0][indexOffset+1];
forceSum[2] += amoebaGpu->psForce->_pSysStream[0][indexOffset+2];
// torque
(void) fprintf( amoebaGpu->log,"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] ",
......@@ -1225,6 +1218,17 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
ii = gpu->natoms - maxPrint;
}
}
gpu->psEnergy->Download();
double energy = 0.0;
for( unsigned int ii = 0; ii < gpu->sim.energyOutputBuffers; ii++ ){
if( (*gpu->psEnergy)[ii] != (*gpu->psEnergy)[ii] || (*gpu->psEnergy)[ii] == std::numeric_limits<double>::infinity() || (*gpu->psEnergy)[ii] == -std::numeric_limits<double>::infinity() ){
(void) fprintf( amoebaGpu->log,"Energy nan at index=%d\n", ii );
} else {
energy += (*gpu->psEnergy)[ii];
}
}
(void) fprintf( amoebaGpu->log,"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e\n", forceSum[0], forceSum[1], forceSum[2], energy );
if( 1 ){
(void) fprintf( amoebaGpu->log,"DebugElec\n" );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
......
......@@ -205,13 +205,13 @@ if( atomI == targetAtom ){
debugArray[index].x = mask ? torque[0][0] : 0.0f;
debugArray[index].y = mask ? torque[0][1] : 0.0f;
debugArray[index].z = mask ? torque[0][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = mask ? energy : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[0][0] : 0.0f;
debugArray[index].y = mask ? torque[0][1] : 0.0f;
debugArray[index].z = mask ? torque[0][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
......@@ -364,13 +364,13 @@ if( atomI == targetAtom || atomJ == targetAtom ){
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = mask ? energy : 0.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
......@@ -460,25 +460,24 @@ if( atomI == targetAtom || atomJ == targetAtom ){
debugArray[index].y = (float) atomJ;
debugArray[index].z = (float) y;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? forceSign*force[0] : 0.0f;
debugArray[index].y = mask ? forceSign*force[1] : 0.0f;
debugArray[index].z = mask ? forceSign*force[2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = energy;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
debugArray[index].w = blockId;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
......@@ -611,5 +610,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
pos++;
}
//printf( "Hello thread: %d %d %d %d\n", blockIdx.x * blockDim.x + threadIdx.x, blockIdx.x, blockDim.x, threadIdx.x );
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment