"...git@developer.sourcefind.cn:2222/OpenDAS/flashmla.git" did not exist on "2d291b0c31050ba259e87a4ae6fbf75a47824716"
Commit c22f00cf authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

AmoebaCudaPmeMutualInducedField

parent 3763b76b
...@@ -2763,6 +2763,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu) ...@@ -2763,6 +2763,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaVdw14_7Sim( amoebaGpu ); SetCalculateAmoebaCudaVdw14_7Sim( amoebaGpu );
SetCalculateAmoebaCudaWcaDispersionSim( amoebaGpu ); SetCalculateAmoebaCudaWcaDispersionSim( amoebaGpu );
SetCalculateAmoebaCudaMutualInducedFieldSim( amoebaGpu ); SetCalculateAmoebaCudaMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeFixedEFieldSim( amoebaGpu ); SetCalculateAmoebaCudaPmeFixedEFieldSim( amoebaGpu );
SetCalculateAmoebaElectrostaticSim( amoebaGpu ); SetCalculateAmoebaElectrostaticSim( amoebaGpu );
SetCalculateAmoebaRealSpaceEwaldSim( amoebaGpu ); SetCalculateAmoebaRealSpaceEwaldSim( amoebaGpu );
......
...@@ -88,6 +88,10 @@ extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu); ...@@ -88,6 +88,10 @@ extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu); extern void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaMutualInducedField( amoebaGpuContext gpu); extern void cudaComputeAmoebaMutualInducedField( amoebaGpuContext gpu);
extern void SetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaPmeMutualInducedField( amoebaGpuContext gpu);
// mutual induced and Gk // mutual induced and Gk
extern void SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGpu); extern void SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGpu);
......
...@@ -36,14 +36,11 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp ...@@ -36,14 +36,11 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
//#define AMOEBA_DEBUG //#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG #undef AMOEBA_DEBUG
__device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ, #define GK
float dampingFactorI, float dampingFactorJ, #include "kCalculateAmoebaCudaMutualInducedParticle.h"
float tholeI, float tholeJ, #undef GK
float* inducedDipoleI, float* inducedDipolePolarI,
float* inducedDipoleJ, float* inducedDipolePolarJ, __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( MutualInducedParticle& atomI, MutualInducedParticle& atomJ,
float* inducedDipoleSI, float* inducedDipolePolarSI,
float* inducedDipoleSJ, float* inducedDipolePolarSJ,
float scalingDistanceCutoff,
float fields[8][3] float fields[8][3]
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -59,9 +56,9 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor ...@@ -59,9 +56,9 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
// get deltaR, and r between 2 atoms // get deltaR, and r between 2 atoms
deltaR[0] = atomCoordinatesJ.x - atomCoordinatesI.x; deltaR[0] = atomJ.x - atomI.x;
deltaR[1] = atomCoordinatesJ.y - atomCoordinatesI.y; deltaR[1] = atomJ.y - atomI.y;
deltaR[2] = atomCoordinatesJ.z - atomCoordinatesI.z; deltaR[2] = atomJ.z - atomI.z;
float r = sqrtf( deltaR[0]*deltaR[0] + deltaR[1]*deltaR[1] + deltaR[2]*deltaR[2] ); float r = sqrtf( deltaR[0]*deltaR[0] + deltaR[1]*deltaR[1] + deltaR[2]*deltaR[2] );
float rI = 1.0f/r; float rI = 1.0f/r;
...@@ -69,112 +66,59 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor ...@@ -69,112 +66,59 @@ __device__ void calculateMutualInducedAndGkFieldsPairIxn_kernel( float4 atomCoor
float rr3 = -rI*r2I; float rr3 = -rI*r2I;
float rr5 = -3.0f*rr3*r2I; float rr5 = -3.0f*rr3*r2I;
float dampProd = dampingFactorI*dampingFactorJ; float dampProd = atomI.damp*atomJ.damp;
float ratio = (dampProd != 0.0f) ? (r/dampProd) : 1.0f; float ratio = (dampProd != 0.0f) ? (r/dampProd) : 1.0f;
float pGamma = tholeJ > tholeI ? tholeI : tholeJ; float pGamma = atomI.thole > atomJ.thole ? atomJ.thole: atomI.thole;
float damp = ratio*ratio*ratio*pGamma; float damp = ratio*ratio*ratio*pGamma;
float dampExp = ( (dampProd != 0.0f) && (r < scalingDistanceCutoff) ) ? expf( -damp ) : 0.0f; float dampExp = ( (dampProd != 0.0f) && (r < cAmoebaSim.scalingDistanceCutoff) ) ? expf( -damp ) : 0.0f;
rr3 *= (1.0f - dampExp); rr3 *= (1.0f - dampExp);
rr5 *= (1.0f - ( 1.0f + damp )*dampExp); rr5 *= (1.0f - ( 1.0f + damp )*dampExp);
float* dipole = inducedDipoleJ; float dDotDelta = rr5*(deltaR[0]*atomJ.inducedDipole[0] + deltaR[1]*atomJ.inducedDipole[1] + deltaR[2]*atomJ.inducedDipole[2] );
float dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[0][0] = rr3*atomJ.inducedDipole[0] + dDotDelta*deltaR[0];
fields[0][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[0][1] = rr3*atomJ.inducedDipole[1] + dDotDelta*deltaR[1];
fields[0][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[0][2] = rr3*atomJ.inducedDipole[2] + dDotDelta*deltaR[2];
fields[0][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipolePolarJ; dDotDelta = rr5*(deltaR[0]*atomJ.inducedDipolePolar[0] + deltaR[1]*atomJ.inducedDipolePolar[1] + deltaR[2]*atomJ.inducedDipolePolar[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[1][0] = rr3*atomJ.inducedDipolePolar[0] + dDotDelta*deltaR[0];
fields[1][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[1][1] = rr3*atomJ.inducedDipolePolar[1] + dDotDelta*deltaR[1];
fields[1][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[1][2] = rr3*atomJ.inducedDipolePolar[2] + dDotDelta*deltaR[2];
fields[1][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipoleI; dDotDelta = rr5*(deltaR[0]*atomI.inducedDipole[0] + deltaR[1]*atomI.inducedDipole[1] + deltaR[2]*atomI.inducedDipole[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[2][0] = rr3*atomI.inducedDipole[0] + dDotDelta*deltaR[0];
fields[2][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[2][1] = rr3*atomI.inducedDipole[1] + dDotDelta*deltaR[1];
fields[2][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[2][2] = rr3*atomI.inducedDipole[2] + dDotDelta*deltaR[2];
fields[2][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipolePolarI; dDotDelta = rr5*(deltaR[0]*atomI.inducedDipolePolar[0] + deltaR[1]*atomI.inducedDipolePolar[1] + deltaR[2]*atomI.inducedDipolePolar[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[3][0] = rr3*atomI.inducedDipolePolar[0] + dDotDelta*deltaR[0];
fields[3][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[3][1] = rr3*atomI.inducedDipolePolar[1] + dDotDelta*deltaR[1];
fields[3][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[3][2] = rr3*atomI.inducedDipolePolar[2] + dDotDelta*deltaR[2];
fields[3][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dDotDelta = rr5*(deltaR[0]*atomJ.inducedDipoleS[0] + deltaR[1]*atomJ.inducedDipoleS[1] + deltaR[2]*atomJ.inducedDipoleS[2] );
dipole = inducedDipoleSJ; fields[4][0] = rr3*atomJ.inducedDipoleS[0] + dDotDelta*deltaR[0];
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[4][1] = rr3*atomJ.inducedDipoleS[1] + dDotDelta*deltaR[1];
fields[4][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[4][2] = rr3*atomJ.inducedDipoleS[2] + dDotDelta*deltaR[2];
fields[4][1] = rr3*dipole[1] + dDotDelta*deltaR[1];
fields[4][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipolePolarSJ; dDotDelta = rr5*(deltaR[0]*atomJ.inducedDipolePolarS[0] + deltaR[1]*atomJ.inducedDipolePolarS[1] + deltaR[2]*atomJ.inducedDipolePolarS[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[5][0] = rr3*atomJ.inducedDipolePolarS[0] + dDotDelta*deltaR[0];
fields[5][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[5][1] = rr3*atomJ.inducedDipolePolarS[1] + dDotDelta*deltaR[1];
fields[5][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[5][2] = rr3*atomJ.inducedDipolePolarS[2] + dDotDelta*deltaR[2];
fields[5][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipoleSI; dDotDelta = rr5*(deltaR[0]*atomI.inducedDipoleS[0] + deltaR[1]*atomI.inducedDipoleS[1] + deltaR[2]*atomI.inducedDipoleS[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[6][0] = rr3*atomI.inducedDipoleS[0] + dDotDelta*deltaR[0];
fields[6][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[6][1] = rr3*atomI.inducedDipoleS[1] + dDotDelta*deltaR[1];
fields[6][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[6][2] = rr3*atomI.inducedDipoleS[2] + dDotDelta*deltaR[2];
fields[6][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
dipole = inducedDipolePolarSI; dDotDelta = rr5*(deltaR[0]*atomI.inducedDipolePolarS[0] + deltaR[1]*atomI.inducedDipolePolarS[1] + deltaR[2]*atomI.inducedDipolePolarS[2] );
dDotDelta = rr5*(deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] ); fields[7][0] = rr3*atomI.inducedDipolePolarS[0] + dDotDelta*deltaR[0];
fields[7][0] = rr3*dipole[0] + dDotDelta*deltaR[0]; fields[7][1] = rr3*atomI.inducedDipolePolarS[1] + dDotDelta*deltaR[1];
fields[7][1] = rr3*dipole[1] + dDotDelta*deltaR[1]; fields[7][2] = rr3*atomI.inducedDipolePolarS[2] + dDotDelta*deltaR[2];
fields[7][2] = rr3*dipole[2] + dDotDelta*deltaR[2];
#ifdef AMOEBA_DEBUGX
//int n2 = numberOfAtoms*numberOfAtoms;
//int debugIndex = atomI*numberOfAtoms + atomJ;
if( atomI == 0 ){
int debugIndex = atomJ;
debugArray[debugIndex].x = rr3;
debugArray[debugIndex].y = rr5;
dipole = &inducedDipole[atomJ*3];
debugArray[debugIndex].z = (deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] );
dipole = &inducedDipole[atomI*3];
debugArray[debugIndex].w = (deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] );
debugIndex += numberOfAtoms;
dipole = &inducedDipolePolar[atomJ*3];
debugArray[debugIndex].x = (deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] );
dipole = &inducedDipolePolar[atomI*3];
debugArray[debugIndex].y = (deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2] );
debugIndex += numberOfAtoms;
debugArray[debugIndex].x = fields1[atomI*numberOfAtoms*3 + atomJ*3 +0];
debugArray[debugIndex].y = fields1[atomI*numberOfAtoms*3 + atomJ*3 +1];
debugArray[debugIndex].z = fields1[atomI*numberOfAtoms*3 + atomJ*3 +2];
debugIndex += numberOfAtoms;
debugArray[debugIndex].x = fields1[atomJ*numberOfAtoms*3 + atomI*3 +0];
debugArray[debugIndex].y = fields1[atomJ*numberOfAtoms*3 + atomI*3 +1];
debugArray[debugIndex].z = fields1[atomJ*numberOfAtoms*3 + atomI*3 +2];
debugIndex += numberOfAtoms;
debugArray[debugIndex].x = fields2[atomI*numberOfAtoms*3 + atomJ*3 +0];
debugArray[debugIndex].y = fields2[atomI*numberOfAtoms*3 + atomJ*3 +1];
debugArray[debugIndex].z = fields2[atomI*numberOfAtoms*3 + atomJ*3 +2];
debugIndex += numberOfAtoms;
debugArray[debugIndex].x = fields2[atomJ*numberOfAtoms*3 + atomI*3 +0];
debugArray[debugIndex].y = fields2[atomJ*numberOfAtoms*3 + atomI*3 +1];
debugArray[debugIndex].z = fields2[atomJ*numberOfAtoms*3 + atomI*3 +2];
}
#endif
} }
__device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ, float rb2, __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( MutualInducedParticle& atomI, MutualInducedParticle& atomJ,
float* inducedDipoleSI, float* inducedDipolePolarSI,
float* inducedDipoleSJ, float* inducedDipolePolarSJ,
float gkField[8][3] float gkField[8][3]
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -191,14 +135,16 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo ...@@ -191,14 +135,16 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
float xr = atomCoordinatesJ.x - atomCoordinatesI.x; float xr = atomJ.x - atomI.x;
float yr = atomCoordinatesJ.y - atomCoordinatesI.y; float yr = atomJ.y - atomI.y;
float zr = atomCoordinatesJ.z - atomCoordinatesI.z; float zr = atomJ.z - atomI.z;
float xr2 = xr*xr; float xr2 = xr*xr;
float yr2 = yr*yr; float yr2 = yr*yr;
float zr2 = zr*zr; float zr2 = zr*zr;
float rb2 = atomI.bornRadius*atomJ.bornRadius;
float r2 = xr2 + yr2 + zr2; float r2 = xr2 + yr2 + zr2;
float expterm = expf(-r2/(cAmoebaSim.gkc*rb2)); float expterm = expf(-r2/(cAmoebaSim.gkc*rb2));
float expc = expterm /cAmoebaSim.gkc; float expc = expterm /cAmoebaSim.gkc;
...@@ -209,21 +155,21 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo ...@@ -209,21 +155,21 @@ __device__ void calculateMutualInducedAndGkFieldsGkPairIxn_kernel( float4 atomCo
float gf3 = gf2 * gf; float gf3 = gf2 * gf;
float gf5 = gf3 * gf2; float gf5 = gf3 * gf2;
float duixs = inducedDipoleSI[0]; float duixs = atomI.inducedDipoleS[0];
float duiys = inducedDipoleSI[1]; float duiys = atomI.inducedDipoleS[1];
float duizs = inducedDipoleSI[2]; float duizs = atomI.inducedDipoleS[2];
float puixs = inducedDipolePolarSI[0]; float puixs = atomI.inducedDipolePolarS[0];
float puiys = inducedDipolePolarSI[1]; float puiys = atomI.inducedDipolePolarS[1];
float puizs = inducedDipolePolarSI[2]; float puizs = atomI.inducedDipolePolarS[2];
float dukxs = inducedDipoleSJ[0]; float dukxs = atomJ.inducedDipoleS[0];
float dukys = inducedDipoleSJ[1]; float dukys = atomJ.inducedDipoleS[1];
float dukzs = inducedDipoleSJ[2]; float dukzs = atomJ.inducedDipoleS[2];
float pukxs = inducedDipolePolarSJ[0]; float pukxs = atomJ.inducedDipolePolarS[0];
float pukys = inducedDipolePolarSJ[1]; float pukys = atomJ.inducedDipolePolarS[1];
float pukzs = inducedDipolePolarSJ[2]; float pukzs = atomJ.inducedDipolePolarS[2];
// reaction potential auxiliary terms // reaction potential auxiliary terms
...@@ -280,9 +226,6 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie ...@@ -280,9 +226,6 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie
} }
#endif #endif
#define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h"
#undef GK
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
...@@ -587,24 +530,16 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon ...@@ -587,24 +530,16 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
} }
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
#if 0 kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
(void) fprintf( amoebaGpu->log, "N2 warp\n" ); (void) fflush( amoebaGpu->log );
kCalculateAmoebaMutualInducedAndGkFieldsN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks,
amoebaGpu->nonbondThreadsPerBlock,
sizeof(MutualInducedParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0], amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0], amoebaGpu->psWorkArray_3_2->_pDevStream[0],
amoebaGpu->psWorkArray_3_3->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_4->_pDevStream[0],
debugArray->_pDevStream[0], targetAtom ); debugArray->_pDevStream[0], targetAtom );
#else #else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] ); amoebaGpu->psWorkArray_3_4->_pDevStream[0] );
#endif
#endif #endif
} else { } else {
...@@ -619,12 +554,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon ...@@ -619,12 +554,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
kCalculateAmoebaMutualInducedAndGkFieldsN2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>( kCalculateAmoebaMutualInducedAndGkFieldsN2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0], amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
gpu->psBornRadii->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu->psInducedDipoleS->_pDevStream[0],
amoebaGpu->psInducedDipolePolarS->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
amoebaGpu->psWorkArray_3_2->_pDevStream[0], amoebaGpu->psWorkArray_3_2->_pDevStream[0],
amoebaGpu->psWorkArray_3_3->_pDevStream[0], amoebaGpu->psWorkArray_3_3->_pDevStream[0],
......
...@@ -36,12 +36,6 @@ __launch_bounds__(64, 1) ...@@ -36,12 +36,6 @@ __launch_bounds__(64, 1)
#endif #endif
void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)( void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
unsigned int* workUnit, unsigned int* workUnit,
float4* atomCoord,
float* bornRadii,
float* inducedDipole,
float* inducedDipolePolar,
float* inducedDipoleS,
float* inducedDipolePolarS,
float* outputField, float* outputField,
float* outputFieldPolar, float* outputFieldPolar,
float* outputFieldS, float* outputFieldS,
...@@ -60,13 +54,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)( ...@@ -60,13 +54,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
float4 jCoord;
float jBornRadius;
float jDipole[3];
float jDipolePolar[3];
float jDipoleS[3];
float jDipolePolarS[3];
while (pos < end) while (pos < end)
{ {
...@@ -84,7 +71,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)( ...@@ -84,7 +71,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
MutualInducedParticle* psA = &sA[tbx]; MutualInducedParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx; unsigned int atomI = x + tgx;
float4 iCoord = atomCoord[atomI]; MutualInducedParticle localParticle;
loadMutualInducedShared( &localParticle, atomI );
float fieldSum[3]; float fieldSum[3];
float fieldPolarSum[3]; float fieldPolarSum[3];
...@@ -115,9 +103,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)( ...@@ -115,9 +103,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load shared data // load shared data
loadMutualInducedShared( &(sA[threadIdx.x]), atomI, loadMutualInducedShared( &(sA[threadIdx.x]), atomI );
atomCoord, inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole,
bornRadii, inducedDipoleS, inducedDipolePolarS );
for (unsigned int j = 0; j < GRID; j++) for (unsigned int j = 0; j < GRID; j++)
{ {
...@@ -126,16 +112,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)( ...@@ -126,16 +112,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedAndGkFields, _kernel)(
// load coords, charge, ... // load coords, charge, ...
loadMutualInducedData( &(psA[j]), &jCoord, jDipole, jDipolePolar, &jBornRadius, jDipoleS, jDipolePolarS ); calculateMutualInducedAndGkFieldsPairIxn_kernel( localParticle, psA[j], ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(inducedDipole[atomI*3]), &(inducedDipolePolar[atomI*3]),
jDipole, jDipolePolar,
&(inducedDipoleS[atomI*3]), &(inducedDipolePolarS[atomI*3]),
jDipoleS, jDipolePolarS,
cAmoebaSim.scalingDistanceCutoff, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
...@@ -177,10 +154,7 @@ if( atomI == targetAtom ){ ...@@ -177,10 +154,7 @@ if( atomI == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[5], mask, 4.0f ); index = debugAccumulate( index, debugArray, ijField[5], mask, 4.0f );
} }
#endif #endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel( iCoord, jCoord, bornRadii[atomI]*jBornRadius, calculateMutualInducedAndGkFieldsGkPairIxn_kernel( localParticle, psA[j], ijField
&(inducedDipoleS[atomI*3]), &(inducedDipolePolarS[atomI*3]),
jDipoleS, jDipolePolarS,
ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
...@@ -247,9 +221,7 @@ if( atomI == targetAtom ){ ...@@ -247,9 +221,7 @@ if( atomI == targetAtom ){
{ {
// load coordinates, charge, ... // load coordinates, charge, ...
loadMutualInducedShared( &(sA[threadIdx.x]), (y+tgx), loadMutualInducedShared( &(sA[threadIdx.x]), (y+tgx) );
atomCoord, inducedDipole,
inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole, bornRadii, inducedDipoleS, inducedDipolePolarS );
} }
// zero shared fields // zero shared fields
...@@ -263,16 +235,7 @@ if( atomI == targetAtom ){ ...@@ -263,16 +235,7 @@ if( atomI == targetAtom ){
// load coords, charge, ... // load coords, charge, ...
loadMutualInducedData( &(psA[tj]), &jCoord, jDipole, jDipolePolar, &jBornRadius, jDipoleS, jDipolePolarS ); calculateMutualInducedAndGkFieldsPairIxn_kernel( localParticle, psA[tj], ijField
calculateMutualInducedAndGkFieldsPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(inducedDipole[atomI*3]), &(inducedDipolePolar[atomI*3]),
jDipole, jDipolePolar,
&(inducedDipoleS[atomI*3]), &(inducedDipolePolarS[atomI*3]),
jDipoleS, jDipolePolarS,
cAmoebaSim.scalingDistanceCutoff, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
...@@ -344,10 +307,7 @@ if( atomI == targetAtom || (y + tj) == targetAtom ){ ...@@ -344,10 +307,7 @@ if( atomI == targetAtom || (y + tj) == targetAtom ){
index = debugAccumulate( index, debugArray, ijField[indexI+5], maskD, -4.0f ); index = debugAccumulate( index, debugArray, ijField[indexI+5], maskD, -4.0f );
} }
#endif #endif
calculateMutualInducedAndGkFieldsGkPairIxn_kernel( iCoord, jCoord, bornRadii[atomI]*jBornRadius, calculateMutualInducedAndGkFieldsGkPairIxn_kernel( localParticle, psA[tj], ijField
&(inducedDipoleS[atomI*3]), &(inducedDipolePolarS[atomI*3]),
jDipoleS, jDipolePolarS,
ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
......
...@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) ...@@ -36,11 +36,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif #endif
void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)( void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
unsigned int* workUnit, unsigned int* workUnit,
float4* atomCoord, float* outputField, float* outputFieldPolar
float* inducedDipole,
float* inducedDipolePolar,
float* outputField,
float* outputFieldPolar
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom , float4* debugArray, unsigned int targetAtom
#endif #endif
...@@ -55,10 +51,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)( ...@@ -55,10 +51,6 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
float4 jCoord;
float jDipole[3];
float jDipolePolar[3];
while (pos < end) while (pos < end)
{ {
...@@ -76,7 +68,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)( ...@@ -76,7 +68,8 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
MutualInducedParticle* psA = &sA[tbx]; MutualInducedParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx; unsigned int atomI = x + tgx;
float4 iCoord = atomCoord[atomI]; MutualInducedParticle localParticle;
loadMutualInducedShared( &localParticle, atomI );
float fieldSum[3]; float fieldSum[3];
float fieldPolarSum[3]; float fieldPolarSum[3];
...@@ -97,9 +90,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)( ...@@ -97,9 +90,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load shared data // load shared data
loadMutualInducedShared( &(sA[threadIdx.x]), atomI, loadMutualInducedShared( &(sA[threadIdx.x]), atomI );
atomCoord, inducedDipole,
inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
for (unsigned int j = 0; j < GRID; j++) for (unsigned int j = 0; j < GRID; j++)
{ {
...@@ -108,14 +99,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)( ...@@ -108,14 +99,7 @@ void METHOD_NAME(kCalculateAmoebaMutualInducedField, _kernel)(
// load coords, charge, ... // load coords, charge, ...
loadMutualInducedData( &(psA[j]), &jCoord, jDipole, jDipolePolar ); calculateMutualInducedFieldPairIxn_kernel( localParticle, psA[j], ijField
calculateMutualInducedFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(inducedDipole[atomI*3]), &(inducedDipolePolar[atomI*3]),
jDipole, jDipolePolar,
cAmoebaSim.scalingDistanceCutoff, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
...@@ -187,9 +171,7 @@ if( atomI == targetAtom ){ ...@@ -187,9 +171,7 @@ if( atomI == targetAtom ){
// load coordinates, charge, ... // load coordinates, charge, ...
loadMutualInducedShared( &(sA[threadIdx.x]), atomJ, loadMutualInducedShared( &(sA[threadIdx.x]), atomJ );
atomCoord, inducedDipole,
inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
} }
// zero shared fields // zero shared fields
...@@ -203,14 +185,7 @@ if( atomI == targetAtom ){ ...@@ -203,14 +185,7 @@ if( atomI == targetAtom ){
// load coords, charge, ... // load coords, charge, ...
loadMutualInducedData( &(psA[tj]), &jCoord, jDipole, jDipolePolar ); calculateMutualInducedFieldPairIxn_kernel( localParticle, psA[tj], ijField
calculateMutualInducedFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(inducedDipole[atomI*3]), &(inducedDipolePolar[atomI*3]),
jDipole, jDipolePolar,
cAmoebaSim.scalingDistanceCutoff, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, debugArray , debugArray
#endif #endif
......
...@@ -26,58 +26,50 @@ struct MutualInducedParticle { ...@@ -26,58 +26,50 @@ struct MutualInducedParticle {
#endif #endif
}; };
__device__ void loadMutualInducedShared( struct MutualInducedParticle* sA, unsigned int atomI, __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsigned int atomI )
float4* atomCoord,
float* inducedDipole, float* inducedDipolePolar,
float2* dampingFactorAndThole
#ifdef GK
, float* bornRadii, float* inducedDipoleS, float* inducedDipolePolarS
#endif
)
{ {
// coordinates & charge // coordinates & charge
sA->x = atomCoord[atomI].x; sA->x = cSim.pPosq[atomI].x;
sA->y = atomCoord[atomI].y; sA->y = cSim.pPosq[atomI].y;
sA->z = atomCoord[atomI].z; sA->z = cSim.pPosq[atomI].z;
sA->q = atomCoord[atomI].w; sA->q = cSim.pPosq[atomI].w;
// dipole // dipole
sA->inducedDipole[0] = inducedDipole[atomI*3]; sA->inducedDipole[0] = cAmoebaSim.pInducedDipole[atomI*3];
sA->inducedDipole[1] = inducedDipole[atomI*3+1]; sA->inducedDipole[1] = cAmoebaSim.pInducedDipole[atomI*3+1];
sA->inducedDipole[2] = inducedDipole[atomI*3+2]; sA->inducedDipole[2] = cAmoebaSim.pInducedDipole[atomI*3+2];
// dipole polar // dipole polar
sA->inducedDipolePolar[0] = inducedDipolePolar[atomI*3]; sA->inducedDipolePolar[0] = cAmoebaSim.pInducedDipolePolar[atomI*3];
sA->inducedDipolePolar[1] = inducedDipolePolar[atomI*3+1]; sA->inducedDipolePolar[1] = cAmoebaSim.pInducedDipolePolar[atomI*3+1];
sA->inducedDipolePolar[2] = inducedDipolePolar[atomI*3+2]; sA->inducedDipolePolar[2] = cAmoebaSim.pInducedDipolePolar[atomI*3+2];
sA->damp = dampingFactorAndThole[atomI].x; sA->damp = cAmoebaSim.pDampingFactorAndThole[atomI].x;
sA->thole = dampingFactorAndThole[atomI].y; sA->thole = cAmoebaSim.pDampingFactorAndThole[atomI].y;
#ifdef GK #ifdef GK
sA->bornRadius = bornRadii[atomI]; sA->bornRadius = cSim.pBornRadii[atomI];
// dipoleS // dipoleS
sA->inducedDipoleS[0] = inducedDipoleS[atomI*3]; sA->inducedDipoleS[0] = cAmoebaSim.pInducedDipoleS[atomI*3];
sA->inducedDipoleS[1] = inducedDipoleS[atomI*3+1]; sA->inducedDipoleS[1] = cAmoebaSim.pInducedDipoleS[atomI*3+1];
sA->inducedDipoleS[2] = inducedDipoleS[atomI*3+2]; sA->inducedDipoleS[2] = cAmoebaSim.pInducedDipoleS[atomI*3+2];
// dipole polar S // dipole polar S
sA->inducedDipolePolarS[0] = inducedDipolePolarS[atomI*3]; sA->inducedDipolePolarS[0] = cAmoebaSim.pInducedDipolePolarS[atomI*3];
sA->inducedDipolePolarS[1] = inducedDipolePolarS[atomI*3+1]; sA->inducedDipolePolarS[1] = cAmoebaSim.pInducedDipolePolarS[atomI*3+1];
sA->inducedDipolePolarS[2] = inducedDipolePolarS[atomI*3+2]; sA->inducedDipolePolarS[2] = cAmoebaSim.pInducedDipolePolarS[atomI*3+2];
#endif #endif
} }
__device__ static void zeroMutualInducedParticleSharedField( struct MutualInducedParticle* sA ) __device__ static void zeroMutualInducedParticleSharedField( MutualInducedParticle* sA )
{ {
// zero shared fields // zero shared fields
...@@ -101,44 +93,3 @@ __device__ static void zeroMutualInducedParticleSharedField( struct MutualInduce ...@@ -101,44 +93,3 @@ __device__ static void zeroMutualInducedParticleSharedField( struct MutualInduce
#endif #endif
} }
// load struct and arrays w/ shared data in sA
__device__ void loadMutualInducedData( struct MutualInducedParticle* sA, float4* jCoord,
float* jInducedDipole, float* jInducedDipolePolar
#ifdef GK
, float* jBornRadius, float* jInducedDipoleS, float* jInducedDipolePolarS
#endif
)
{
// load coords, charge, ...
jCoord->x = sA->x;
jCoord->y = sA->y;
jCoord->z = sA->z;
jCoord->w = sA->q;
jInducedDipole[0] = sA->inducedDipole[0];
jInducedDipole[1] = sA->inducedDipole[1];
jInducedDipole[2] = sA->inducedDipole[2];
jInducedDipolePolar[0] = sA->inducedDipolePolar[0];
jInducedDipolePolar[1] = sA->inducedDipolePolar[1];
jInducedDipolePolar[2] = sA->inducedDipolePolar[2];
#ifdef GK
*jBornRadius = sA->bornRadius;
jInducedDipoleS[0] = sA->inducedDipoleS[0];
jInducedDipoleS[1] = sA->inducedDipoleS[1];
jInducedDipoleS[2] = sA->inducedDipoleS[2];
jInducedDipolePolarS[0] = sA->inducedDipolePolarS[0];
jInducedDipolePolarS[1] = sA->inducedDipolePolarS[1];
jInducedDipolePolarS[2] = sA->inducedDipolePolarS[2];
#endif
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
unsigned int* workUnit,
float* outputField, float* outputFieldPolar
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
#endif
){
extern __shared__ MutualInducedParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
const float uscale = 1.0f;
#ifdef AMOEBA_DEBUG
float4 pullBack[4];
#endif
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
MutualInducedParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
MutualInducedParticle localParticle;
loadMutualInducedShared( &localParticle, atomI );
float fieldSum[3];
float fieldPolarSum[3];
// 0: field at i due to j
// 1: field at i due to j polar
fieldSum[0] = 0.0f;
fieldSum[1] = 0.0f;
fieldSum[2] = 0.0f;
fieldPolarSum[0] = 0.0f;
fieldPolarSum[1] = 0.0f;
fieldPolarSum[2] = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadMutualInducedShared( &(sA[threadIdx.x]), atomI );
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[4][3];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel( localParticle, psA[j], uscale, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
unsigned int mask = ( (atomI == (y + j)) || (atomI >= cAmoebaSim.numberOfAtoms) || ((y+j) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
// add to field at atomI the field due atomJ's dipole
fieldSum[0] += mask ? ijField[0][0] : 0.0f;
fieldSum[1] += mask ? ijField[0][1] : 0.0f;
fieldSum[2] += mask ? ijField[0][2] : 0.0f;
fieldPolarSum[0] += mask ? ijField[2][0] : 0.0f;
fieldPolarSum[1] += mask ? ijField[2][1] : 0.0f;
fieldPolarSum[2] += mask ? ijField[2][2] : 0.0f;
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+j) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j);
debugArray[index].z = cAmoebaSim.cutoffDistance2;
debugArray[index].w = 6.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
float flag = 6.0f;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputFieldPolar);
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputField );
load3dArray( offset, fieldPolarSum, outputFieldPolar);
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
unsigned int atomJ = y + tgx;
// load coordinates, charge, ...
loadMutualInducedShared( &(sA[threadIdx.x]), atomJ );
}
// zero shared fields
zeroMutualInducedParticleSharedField( &(sA[threadIdx.x]) );
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[4][3];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel( localParticle, psA[tj], uscale, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
// add to field at atomI the field due atomJ's dipole
fieldSum[0] += mask ? ijField[0][0] : 0.0f;
fieldSum[1] += mask ? ijField[0][1] : 0.0f;
fieldSum[2] += mask ? ijField[0][2] : 0.0f;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum[0] += mask ? ijField[2][0] : 0.0f;
fieldPolarSum[1] += mask ? ijField[2][1] : 0.0f;
fieldPolarSum[2] += mask ? ijField[2][2] : 0.0f;
// add to field at atomJ the field due atomI's dipole
psA[tj].field[0] += mask ? ijField[1][0] : 0.0f;
psA[tj].field[1] += mask ? ijField[1][1] : 0.0f;
psA[tj].field[2] += mask ? ijField[1][2] : 0.0f;
// add to polar field at atomJ the field due atomI's dipole
psA[tj].fieldPolar[0] += mask ? ijField[3][0] : 0.0f;
psA[tj].fieldPolar[1] += mask ? ijField[3][1] : 0.0f;
psA[tj].fieldPolar[2] += mask ? ijField[3][2] : 0.0f;
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+tj) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+tj) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = cAmoebaSim.cutoffDistance2;
debugArray[index].w = 7.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
float flag = 7.0f;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
tj = (tj + 1) & (GRID - 1);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputFieldPolar);
offset = 3*(y + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].field, outputField );
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].fieldPolar, outputFieldPolar);
#else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputField );
load3dArray( offset, fieldPolarSum, outputFieldPolar);
offset = 3*(y + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].field, outputField );
load3dArray( offset, sA[threadIdx.x].fieldPolar, outputFieldPolar);
#endif
lasty = y;
}
pos++;
}
}
...@@ -373,6 +373,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG ...@@ -373,6 +373,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaMutualInducedField( amoebaGpu ); cudaComputeAmoebaMutualInducedField( amoebaGpu );
} else { } else {
cudaComputeAmoebaPmeFixedEField( amoebaGpu ); cudaComputeAmoebaPmeFixedEField( amoebaGpu );
cudaComputeAmoebaPmeMutualInducedField( amoebaGpu );
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment