"platforms/opencl/tests/TestOpenCLCompoundIntegrator.cpp" did not exist on "af95a5349e5de9b4bbcd67d8e012400a3e8811ba"
Commit ad1b1ad1 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fixed PmeDirectElectrostatic energy

parent 1224ac17
...@@ -3308,7 +3308,7 @@ static unsigned int targetAtoms[2] = { 0, 1}; ...@@ -3308,7 +3308,7 @@ static unsigned int targetAtoms[2] = { 0, 1};
ps_M_ScaleIndices->_pSysStream[0][scaleOffset].y |= (1 << kk); ps_M_ScaleIndices->_pSysStream[0][scaleOffset].y |= (1 << kk);
} }
if( amoebaGpu->log && ( (atomI == targetAtoms[0]) || (atomI == targetAtoms[1]) ) ){ if( 0 && amoebaGpu->log && ( (atomI == targetAtoms[0]) || (atomI == targetAtoms[1]) ) ){
(void) fprintf( amoebaGpu->log, "XXX cell=%u [%u %u] [%d %d] p[%d %d] m[%d %d] scaleOffset=%d kk=%d\n", (void) fprintf( amoebaGpu->log, "XXX cell=%u [%u %u] [%d %d] p[%d %d] m[%d %d] scaleOffset=%d kk=%d\n",
ii, atomI, atomJ, covalentDegree, polarizationDegree, pX, pY, mX, mY, scaleOffset, kk ); ii, atomI, atomJ, covalentDegree, polarizationDegree, pX, pY, mX, mY, scaleOffset, kk );
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "amoebaCudaKernels.h" #include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h" #include "kCalculateAmoebaCudaUtilities.h"
#define AMOEBA_DEBUG //#define AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim; static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
...@@ -74,8 +74,31 @@ struct PmeDirectElectrostaticParticle { ...@@ -74,8 +74,31 @@ struct PmeDirectElectrostaticParticle {
}; };
// self-energy for PME
/*
__device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
float4* debugArray, float4* pullBack )
{
unsigned int index = atomI + atomJ*cAmoebaSim.paddedNumberOfAtoms;
float blockId = 111.0f;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) atomJ;
debugArray[index].z = 0.0f;
debugArray[index].w = blockId;
for( int pullIndex = 0; pullIndex < 1; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
}
}
*/
// self-energy for PME
/*
__device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy) __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy)
{ {
float term = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float term = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
...@@ -99,7 +122,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir ...@@ -99,7 +122,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir
*energy = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f)); *energy = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f));
*energy += term*uii/3.0f; *energy += term*uii/3.0f;
*energy *= fterm; *energy *= fterm;
} } */
// self-torque for PME // self-torque for PME
...@@ -517,10 +540,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -517,10 +540,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
+ rr7*gli[3]*psc7); + rr7*gli[3]*psc7);
e = e - (1.0f-scalingFactors[MScaleIndex])*erl; e = e - (1.0f-scalingFactors[MScaleIndex])*erl;
ei = ei - erli; ei = ei - erli;
e = conversionFactor * e;
ei = conversionFactor * ei; *energy = conversionFactor*(e + ei);
// em = em + e;
// ep = ep + ei;
// increment the total intramolecular energy; assumes; // increment the total intramolecular energy; assumes;
// intramolecular distances are less than half of cell; // intramolecular distances are less than half of cell;
...@@ -915,17 +936,17 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -915,17 +936,17 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
debugIndex++; debugIndex++;
*/ */
idTracker += 1.0; idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm2i[1]; debugArray[debugIndex].x = e;
debugArray[debugIndex].y = conversionFactor*ftm2i[2]; debugArray[debugIndex].y = ei;
debugArray[debugIndex].z = conversionFactor*ftm2i[3]; debugArray[debugIndex].z = erl;
debugArray[debugIndex].w = r2; debugArray[debugIndex].w = erli;
debugIndex++; debugIndex++;
idTracker += 1.0; idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*fridmp[1]; debugArray[debugIndex].x = r2;
debugArray[debugIndex].y = conversionFactor*fridmp[2]; debugArray[debugIndex].y = cSim.alphaEwald;
debugArray[debugIndex].z = conversionFactor*fridmp[3]; debugArray[debugIndex].z = conversionFactor*fridmp[3];
debugArray[debugIndex].w = cSim.alphaEwald; debugArray[debugIndex].w = 115.0;
debugIndex++; debugIndex++;
idTracker += 1.0; idTracker += 1.0;
...@@ -963,12 +984,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -963,12 +984,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
outputTorque[1][1] = 0.0f; outputTorque[1][1] = 0.0f;
outputTorque[1][2] = 0.0f; outputTorque[1][2] = 0.0f;
*energy = 0.0f;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
for( int ii = 0; ii < 20; ii++ ){ for( int ii = 0; ii < 5; ii++ ){
debugArray[ii].x = 0.0f; debugArray[ii].x = 0.0f;
debugArray[ii].y = 0.0f; debugArray[ii].y = 0.0f;
debugArray[ii].z = 0.0f; debugArray[ii].z = 0.0f;
debugArray[ii].w = (float) ii; debugArray[ii].w = (float) (11*ii);
} }
#endif #endif
...@@ -1091,7 +1114,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1091,7 +1114,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma, amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
amoebaGpu->scalingDistanceCutoff ); amoebaGpu->scalingDistanceCutoff );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms); memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload(); debugArray->Upload();
...@@ -1218,6 +1241,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1218,6 +1241,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
ii = gpu->natoms - maxPrint; ii = gpu->natoms - maxPrint;
} }
} }
(void) fflush( amoebaGpu->log );
gpu->psEnergy->Download(); gpu->psEnergy->Download();
double energy = 0.0; double energy = 0.0;
for( unsigned int ii = 0; ii < gpu->sim.energyOutputBuffers; ii++ ){ for( unsigned int ii = 0; ii < gpu->sim.energyOutputBuffers; ii++ ){
...@@ -1229,12 +1253,30 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1229,12 +1253,30 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
} }
(void) fprintf( amoebaGpu->log,"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e\n", forceSum[0], forceSum[1], forceSum[2], energy ); (void) fprintf( amoebaGpu->log,"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e\n", forceSum[0], forceSum[1], forceSum[2], energy );
if( 0 ){
(void) fprintf( amoebaGpu->log,"DebugElecAll\n" );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms*gpu->natoms; jj++ ){
if( fabs( debugArray->_pSysStream[0][jj].w - 111.0 ) < 1.0e-04 ){
int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%8d [%16.9e %16.9e %16.9e %16.9e] Enr11\n", jj,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
debugIndex += paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"%8d [%16.9e %16.9e %16.9e %16.9e] Enr12\n", jj,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
}
}
}
(void) fprintf( amoebaGpu->log,"\n" );
if( 1 ){ if( 1 ){
(void) fprintf( amoebaGpu->log,"DebugElec\n" ); (void) fprintf( amoebaGpu->log,"DebugElec\n" );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){ for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj; int debugIndex = jj;
for( int kk = 0; kk < 15; kk++ ){ for( int kk = 0; kk < 6; kk++ ){
(void) fprintf( amoebaGpu->log,"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11\n", targetAtom, jj, (void) fprintf( amoebaGpu->log,"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11\n", targetAtom, jj,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y, debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w ); debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
......
...@@ -43,8 +43,8 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)( ...@@ -43,8 +43,8 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
){ ){
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
int pullIndexMax = 12; int maxPullIndex = 2;
float4 pullBack[20]; float4 pullBack[12];
#endif #endif
extern __shared__ PmeDirectElectrostaticParticle sA[]; extern __shared__ PmeDirectElectrostaticParticle sA[];
...@@ -185,6 +185,12 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)( ...@@ -185,6 +185,12 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
totalEnergy += mask ? 0.5*energy : 0.0f; totalEnergy += mask ? 0.5*energy : 0.0f;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
/*
energy = mask ? 0.5*energy : 0.0f;
if( atomI < 200 && (fabs( energy ) > 1.0e+8 || energy != energy) ){
debugSetup( atomI, atomJ, debugArray, pullBack );
} */
if( atomI == targetAtom ){ if( atomI == targetAtom ){
unsigned int index = (atomI == targetAtom) ? atomJ : atomI; unsigned int index = (atomI == targetAtom) ? atomJ : atomI;
float blockId = 1.0f; float blockId = 1.0f;
...@@ -213,7 +219,7 @@ if( atomI == targetAtom ){ ...@@ -213,7 +219,7 @@ if( atomI == targetAtom ){
debugArray[index].z = mask ? torque[0][2] : 0.0f; debugArray[index].z = mask ? torque[0][2] : 0.0f;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x); debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){ for( int pullIndex = 0; pullIndex < maxPullIndex; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullIndex].x; debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y; debugArray[index].y = pullBack[pullIndex].y;
...@@ -340,6 +346,11 @@ if( atomI == targetAtom ){ ...@@ -340,6 +346,11 @@ if( atomI == targetAtom ){
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
/*
energy = mask ? 0.5*energy : 0.0f;
if( atomI < 200 && (fabs( energy ) > 1.0e+8 || energy != energy) ){
debugSetup( atomI, atomJ, debugArray, pullBack );
} */
if( atomI == targetAtom || atomJ == targetAtom ){ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned int index = (atomI == targetAtom) ? atomJ : atomI; unsigned int index = (atomI == targetAtom) ? atomJ : atomI;
...@@ -372,7 +383,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){ ...@@ -372,7 +383,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f; debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x); debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){ for( int pullIndex = 0; pullIndex < maxPullIndex; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullIndex].x; debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y; debugArray[index].y = pullBack[pullIndex].y;
...@@ -449,6 +460,11 @@ if( atomI == targetAtom || atomJ == targetAtom ){ ...@@ -449,6 +460,11 @@ if( atomI == targetAtom || atomJ == targetAtom ){
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
/*
energy = mask ? 0.5*energy : 0.0f;
if( atomI < 200 && (fabs( energy ) > 1.0e+8 || energy != energy) ){
debugSetup( atomI, atomJ, debugArray, pullBack );
} */
if( atomI == targetAtom || atomJ == targetAtom ){ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned int index = (atomI == targetAtom) ? atomJ : atomI; unsigned int index = (atomI == targetAtom) ? atomJ : atomI;
unsigned int indexI = (atomI == targetAtom) ? 0 : 1; unsigned int indexI = (atomI == targetAtom) ? 0 : 1;
...@@ -479,7 +495,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){ ...@@ -479,7 +495,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f; debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x); debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){ for( int pullIndex = 0; pullIndex < maxPullIndex; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullIndex].x; debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y; debugArray[index].y = pullBack[pullIndex].y;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment