Commit cf335495 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

PME real space and self terms for fixed E-field

parent acb46362
......@@ -1420,6 +1420,18 @@ void gpuKirkwoodAllocate( amoebaGpuContext amoebaGpu )
}
static void tabulateErfc(gpuContext gpu)
{
int tableSize = 2048;
gpu->sim.tabulatedErfcSize = tableSize;
gpu->sim.tabulatedErfcScale = tableSize/(gpu->sim.alphaEwald*gpu->sim.nonbondedCutoff);
gpu->psTabulatedErfc = new CUDAStream<float>(tableSize, 1, "TabulatedErfc");
gpu->sim.pTabulatedErfc = gpu->psTabulatedErfc->_pDevData;
for (int i = 0; i < tableSize; ++i)
(*gpu->psTabulatedErfc)[i] = (float) erfc(i*(gpu->sim.alphaEwald*gpu->sim.nonbondedCutoff)/tableSize);
gpu->psTabulatedErfc->Upload();
}
/**---------------------------------------------------------------------------------------
Create/initialize data structs associated w/ molecular -> lab frame calculation
......@@ -1525,14 +1537,26 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
} else if( nonbondedMethod == 1 ){
amoebaGpu->multipoleNonbondedMethod = AMOEBA_PARTICLE_MESH_EWALD;
} else {
throw OpenMM::OpenMMException("multipoleNonbondedMethod not recognzied.\n" );
throw OpenMM::OpenMMException("multipoleNonbondedMethod not recognized.\n" );
}
amoebaGpu->amoebaSim.cutoffDistance2 = cutoffDistance*cutoffDistance;
amoebaGpu->amoebaSim.sqrtPi = sqrt( 3.1415926535897932384626433832795 );
amoebaGpu->amoebaSim.aewald = aewald;
amoebaGpu->amoebaSim.electric = electricConstant;
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d]\n",
methodName.c_str(), nonbondedMethod, amoebaGpu->multipoleNonbondedMethod,
AMOEBA_NO_CUTOFF, AMOEBA_PARTICLE_MESH_EWALD );
(void) fflush( amoebaGpu->log );
}
amoebaGpu->amoebaSim.cutoffDistance2 = cutoffDistance*cutoffDistance;
amoebaGpu->amoebaSim.sqrtPi = sqrt( 3.1415926535897932384626433832795 );
amoebaGpu->amoebaSim.aewald = aewald;
amoebaGpu->amoebaSim.electric = electricConstant;
amoebaGpu->gpuContext->sim.alphaEwald = aewald;
amoebaGpu->gpuContext->sim.nonbondedCutoff = cutoffDistance;
tabulateErfc(amoebaGpu->gpuContext);
if( amoebaGpu->amoebaSim.dielec < 1.0e-05 ){
amoebaGpu->amoebaSim.dielec = 1.0f;
amoebaGpu->amoebaSim.dielec = 1.0f;
}
for( int ii = 0; ii < static_cast<int>(charges.size()); ii++ ){
......@@ -2593,6 +2617,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaVdw14_7Sim( amoebaGpu );
SetCalculateAmoebaCudaWcaDispersionSim( amoebaGpu );
SetCalculateAmoebaCudaMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeFixedEFieldSim( amoebaGpu );
SetCalculateAmoebaElectrostaticSim( amoebaGpu );
SetCalculateAmoebaRealSpaceEwaldSim( amoebaGpu );
SetCalculateAmoebaCudaMapTorquesSim( amoebaGpu );
......
......@@ -65,12 +65,17 @@ extern void SetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext gpu);
extern void kCalculateAmoebaWcaDispersionForces(amoebaGpuContext amoebaGpu );
// fixed electric field
// fixed electric field -- no cutoff
extern void SetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaFixedEField( amoebaGpuContext gpu);
// fixed electric field -- PME
extern void SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext gpu);
// fixed electric field and Gk
extern void SetCalculateAmoebaCudaFixedEAndGKFieldsSim(amoebaGpuContext gpu);
......
......@@ -83,6 +83,9 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
FixedFieldParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
FixedFieldParticle localParticle;
loadFixedFieldShared( &localParticle, atomI, bornRadii );
float4 iCoord = atomCoord[atomI];
float eFieldSum[3];
......@@ -106,9 +109,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), atomI,
atomCoord, labFrameDipole, labFrameQuadrupole,
cAmoebaSim.pDampingFactorAndThole, bornRadii );
loadFixedFieldShared( &(sA[threadIdx.x]), atomI, bornRadii );
if (!bExclusionFlag)
{
......@@ -125,12 +126,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -182,12 +178,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -366,9 +357,7 @@ if( atomI == targetAtom ){
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx),
atomCoord, labFrameDipole, labFrameQuadrupole,
cAmoebaSim.pDampingFactorAndThole, bornRadii );
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx), bornRadii );
}
......@@ -387,12 +376,7 @@ if( atomI == targetAtom ){
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -563,12 +547,7 @@ if( (atomI == targetAtom || (y + tj) == targetAtom) ){
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......
......@@ -61,46 +61,6 @@ static void kReduceE_Fields_kernel(amoebaGpuContext amoebaGpu )
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaFixedEField.h"
#ifdef AMOEBA_DEBUG
#if 0
static void printEFieldBuffer( amoebaGpuContext amoebaGpu, unsigned int bufferIndex )
{
(void) fprintf( amoebaGpu->log, "EField Buffer %u\n", bufferIndex );
unsigned int start = bufferIndex*3*amoebaGpu->paddedNumberOfAtoms;
unsigned int stop = (bufferIndex+1)*3*amoebaGpu->paddedNumberOfAtoms;
for( unsigned int ii = start; ii < stop; ii += 3 ){
unsigned int ii3Index = ii/3;
unsigned int bufferIndex = ii3Index/(amoebaGpu->paddedNumberOfAtoms);
unsigned int particleIndex = ii3Index - bufferIndex*(amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %6u %3u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii/3, bufferIndex, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+2] );
}
}
static void printEFieldAtomBuffers( amoebaGpuContext amoebaGpu, unsigned int targetAtom )
{
(void) fprintf( amoebaGpu->log, "EField atom %u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
unsigned int particleIndex = targetAtom + ii*3*amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log, " %2u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+2] );
}
}
#endif
#endif
/**---------------------------------------------------------------------------------------
Compute fixed electric field
......@@ -145,9 +105,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
(void) fprintf( amoebaGpu->log, "N2 warp\n" );
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psLabFrameDipole->_pDevStream[0],
amoebaGpu->psLabFrameQuadrupole->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
......@@ -167,9 +124,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaFixedE_FieldN2Forces_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psLabFrameDipole->_pDevStream[0],
amoebaGpu->psLabFrameQuadrupole->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
......
......@@ -36,9 +36,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
unsigned int* workUnit,
float4* atomCoord,
float* labFrameDipole,
float* labFrameQuadrupole,
float* outputEField,
float* outputEFieldPolar
#ifdef AMOEBA_DEBUG
......@@ -59,10 +56,6 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
float4 jCoord;
float jDipole[3];
float jQuadrupole[9];
while (pos < end)
{
......@@ -80,7 +73,9 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
FixedFieldParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
float4 iCoord = atomCoord[atomI];
FixedFieldParticle localParticle;
loadFixedFieldShared( &localParticle, atomI );
float fieldSum[3];
float fieldPolarSum[3];
......@@ -98,9 +93,7 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), atomI,
atomCoord, labFrameDipole, labFrameQuadrupole,
cAmoebaSim.pDampingFactorAndThole );
loadFixedFieldShared( &(sA[threadIdx.x]), atomI );
if (!bExclusionFlag)
{
......@@ -113,16 +106,7 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
float ijField[2][3];
// load coords, charge, ...
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -156,14 +140,9 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
float ijField[2][3];
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole );
//loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[j].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[j].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -280,9 +259,7 @@ if( 0 && atomI == targetAtom ){
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx),
atomCoord, labFrameDipole, labFrameQuadrupole,
cAmoebaSim.pDampingFactorAndThole );
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx) );
}
......@@ -297,16 +274,7 @@ if( 0 && atomI == targetAtom ){
float ijField[2][3];
// load coords, charge, ...
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -420,14 +388,7 @@ if( 0 && (atomI == targetAtom || (y + tj) == targetAtom) ){
float ijField[2][3];
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel( iCoord, jCoord,
cAmoebaSim.pDampingFactorAndThole[atomI].x, psA[tj].damp,
cAmoebaSim.pDampingFactorAndThole[atomI].y, psA[tj].thole,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
cAmoebaSim.scalingDistanceCutoff, ijField
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......
......@@ -46,9 +46,7 @@ struct FixedFieldParticle {
#endif
};
__device__ void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned int atomI,
float4* atomCoord, float* labDipole, float* labQuadrupole,
float2* dampingFactorAndThole
__device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned int atomI
#ifdef GK
, float* bornR
#endif
......@@ -56,28 +54,28 @@ __device__ void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned in
{
// coordinates & charge
sA->x = atomCoord[atomI].x;
sA->y = atomCoord[atomI].y;
sA->z = atomCoord[atomI].z;
sA->q = atomCoord[atomI].w;
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole_X = labDipole[atomI*3];
sA->labFrameDipole_Y = labDipole[atomI*3+1];
sA->labFrameDipole_Z = labDipole[atomI*3+2];
sA->labFrameDipole_X = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole_Y = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole_Z = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole_XX = labQuadrupole[atomI*9];
sA->labFrameQuadrupole_XY = labQuadrupole[atomI*9+1];
sA->labFrameQuadrupole_XZ = labQuadrupole[atomI*9+2];
sA->labFrameQuadrupole_YY = labQuadrupole[atomI*9+4];
sA->labFrameQuadrupole_YZ = labQuadrupole[atomI*9+5];
sA->labFrameQuadrupole_ZZ = labQuadrupole[atomI*9+8];
sA->labFrameQuadrupole_XX = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole_XY = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole_XZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole_YY = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole_YZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole_ZZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
sA->damp = dampingFactorAndThole[atomI].x;
sA->thole = dampingFactorAndThole[atomI].y;
sA->damp = cAmoebaSim.pDampingFactorAndThole[atomI].x;
sA->thole = cAmoebaSim.pDampingFactorAndThole[atomI].y;
#ifdef GK
sA->bornR = bornR[atomI];
#endif
......@@ -86,8 +84,8 @@ __device__ void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned in
// load struct and arrays w/ shared data in sA
__device__ void loadFixedFieldParticleData( struct FixedFieldParticle* sA,
float4* jCoord, float* jDipole, float* jQuadrupole
__device__ static void loadFixedFieldParticleData( struct FixedFieldParticle* sA,
float4* jCoord, float* jDipole, float* jQuadrupole
#ifdef GK
, float* bornR
#endif
......@@ -142,15 +140,11 @@ __device__ static void zeroFixedFieldParticleSharedField( struct FixedFieldParti
#endif
}
// body of fixed E-field calculation
__device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ,
float dampingFactorI, float dampingFactorJ,
float tholeI, float tholeJ,
float* labDipoleI, float* labDipoleJ,
float* labQuadrupoleI, float* labQuadrupoleJ,
float scalingDistanceCutoff,
float field[2][3]
__device__ static void calculateFixedEFieldPairIxn_kernel( FixedFieldParticle& atomI, FixedFieldParticle& atomJ,
float field[2][3]
#ifdef AMOEBA_DEBUG
, float4 debugArray[12]
#endif
......@@ -163,9 +157,9 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
// get deltaR and r between 2 atoms
float deltaR[3];
deltaR[0] = atomCoordinatesJ.x - atomCoordinatesI.x;
deltaR[1] = atomCoordinatesJ.y - atomCoordinatesI.y;
deltaR[2] = atomCoordinatesJ.z - atomCoordinatesI.z;
deltaR[0] = atomJ.x - atomI.x;
deltaR[1] = atomJ.y - atomI.y;
deltaR[2] = atomJ.z - atomI.z;
float r = SQRT( deltaR[0]*deltaR[0] + deltaR[1]*deltaR[1] + deltaR[2]*deltaR[2] );
float rI = 1.0f/r;
......@@ -177,14 +171,14 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
// get scaling factors, if needed
float damp = dampingFactorI*dampingFactorJ;
float damp = atomI.damp*atomJ.damp;
float dampExp;
if( damp != 0.0f && r < scalingDistanceCutoff ){
if( damp != 0.0f && r < cAmoebaSim.scalingDistanceCutoff ){
// get scaling factors
float ratio = r/damp;
float pGamma = tholeJ > tholeI ? tholeI : tholeJ;
float pGamma = atomJ.thole > atomI.thole ? atomI.thole : atomJ.thole;
damp = ratio*ratio*ratio*pGamma;
dampExp = EXP( -damp );
} else {
......@@ -197,69 +191,30 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
float rr5_2 = rr5*2.0f;
#ifdef AMOEBA_DEBUG
int index = 0;
// 0-2
debugArray[index].x = r;
debugArray[index].y = rr3;
debugArray[index].z = rr5;
index++;
#endif
float* dipole = labDipoleJ;
float* quadrupole = labQuadrupoleJ;
float qDotDelta[3];
qDotDelta[0] = deltaR[0]*quadrupole[0] + deltaR[1]*quadrupole[1] + deltaR[2]*quadrupole[2];
qDotDelta[1] = deltaR[0]*quadrupole[3] + deltaR[1]*quadrupole[4] + deltaR[2]*quadrupole[5];
qDotDelta[2] = deltaR[0]*quadrupole[6] + deltaR[1]*quadrupole[7] + deltaR[2]*quadrupole[8];
float dotdd = deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2];
float dotqd = deltaR[0]*qDotDelta[0] + deltaR[1]*qDotDelta[1] + deltaR[2]*qDotDelta[2];
float factor = -rr3*atomCoordinatesJ.w + rr5*dotdd - rr7*dotqd;
qDotDelta[0] = deltaR[0]*atomJ.labFrameQuadrupole_XX + deltaR[1]*atomJ.labFrameQuadrupole_XY + deltaR[2]*atomJ.labFrameQuadrupole_XZ;
qDotDelta[1] = deltaR[0]*atomJ.labFrameQuadrupole_XY + deltaR[1]*atomJ.labFrameQuadrupole_YY + deltaR[2]*atomJ.labFrameQuadrupole_YZ;
qDotDelta[2] = deltaR[0]*atomJ.labFrameQuadrupole_XZ + deltaR[1]*atomJ.labFrameQuadrupole_YZ + deltaR[2]*atomJ.labFrameQuadrupole_ZZ;
#ifdef AMOEBA_DEBUG
// 3-5
debugArray[index].x = dotdd;
debugArray[index].y = dotqd;
debugArray[index].z = factor;
index++;
#endif
float dotdd = deltaR[0]*atomJ.labFrameDipole_X + deltaR[1]*atomJ.labFrameDipole_Y + deltaR[2]*atomJ.labFrameDipole_Z;
float dotqd = deltaR[0]*qDotDelta[0] + deltaR[1]*qDotDelta[1] + deltaR[2]*qDotDelta[2];
field[0][0] = deltaR[0]*factor - rr3*dipole[0] + rr5_2*qDotDelta[0];
field[0][1] = deltaR[1]*factor - rr3*dipole[1] + rr5_2*qDotDelta[1];
field[0][2] = deltaR[2]*factor - rr3*dipole[2] + rr5_2*qDotDelta[2];
float factor = -rr3*atomJ.q + rr5*dotdd - rr7*dotqd;
field[0][0] = deltaR[0]*factor - rr3*atomJ.labFrameDipole_X + rr5_2*qDotDelta[0];
field[0][1] = deltaR[1]*factor - rr3*atomJ.labFrameDipole_Y + rr5_2*qDotDelta[1];
field[0][2] = deltaR[2]*factor - rr3*atomJ.labFrameDipole_Z + rr5_2*qDotDelta[2];
dipole = labDipoleI;
quadrupole = labQuadrupoleI;
qDotDelta[0] = deltaR[0]*quadrupole[0] + deltaR[1]*quadrupole[1] + deltaR[2]*quadrupole[2];
qDotDelta[1] = deltaR[0]*quadrupole[3] + deltaR[1]*quadrupole[4] + deltaR[2]*quadrupole[5];
qDotDelta[2] = deltaR[0]*quadrupole[6] + deltaR[1]*quadrupole[7] + deltaR[2]*quadrupole[8];
qDotDelta[0] = deltaR[0]*atomI.labFrameQuadrupole_XX + deltaR[1]*atomI.labFrameQuadrupole_XY + deltaR[2]*atomI.labFrameQuadrupole_XZ;
qDotDelta[1] = deltaR[0]*atomI.labFrameQuadrupole_XY + deltaR[1]*atomI.labFrameQuadrupole_YY + deltaR[2]*atomI.labFrameQuadrupole_YZ;
qDotDelta[2] = deltaR[0]*atomI.labFrameQuadrupole_XZ + deltaR[1]*atomI.labFrameQuadrupole_YZ + deltaR[2]*atomI.labFrameQuadrupole_ZZ;
dotdd = deltaR[0]*dipole[0] + deltaR[1]*dipole[1] + deltaR[2]*dipole[2];
dotdd = deltaR[0]*atomI.labFrameDipole_X + deltaR[1]*atomI.labFrameDipole_Y + deltaR[2]*atomI.labFrameDipole_Z;
dotqd = deltaR[0]*qDotDelta[0] + deltaR[1]*qDotDelta[1] + deltaR[2]*qDotDelta[2];
factor = rr3*atomCoordinatesI.w + rr5*dotdd + rr7*dotqd;
factor = rr3*atomI.q + rr5*dotdd + rr7*dotqd;
#ifdef AMOEBA_DEBUG
// 6-8
debugArray[index].x = dotdd;
debugArray[index].y = dotqd;
debugArray[index].z = factor;
index++;
#endif
field[1][0] = deltaR[0]*factor - rr3*atomI.labFrameDipole_X - rr5_2*qDotDelta[0];
field[1][1] = deltaR[1]*factor - rr3*atomI.labFrameDipole_Y - rr5_2*qDotDelta[1];
field[1][2] = deltaR[2]*factor - rr3*atomI.labFrameDipole_Z - rr5_2*qDotDelta[2];
field[1][0] = deltaR[0]*factor - rr3*dipole[0] - rr5_2*qDotDelta[0];
field[1][1] = deltaR[1]*factor - rr3*dipole[1] - rr5_2*qDotDelta[1];
field[1][2] = deltaR[2]*factor - rr3*dipole[2] - rr5_2*qDotDelta[2];
#if 0
float testValue = 1.0f;
field[0][0] = testValue;
field[0][1] = testValue;
field[0][2] = testValue;
field[1][0] = testValue;
field[1][1] = testValue;
field[1][2] = testValue;
#endif
}
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn, float* fieldOut )
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
// Reduce field
const float term = (4.0f/3.0f)*(cAmoebaSim.aewald*cAmoebaSim.aewald*cAmoebaSim.aewald)/cAmoebaSim.sqrtPi;
while (pos < fieldComponents)
{
// self-term included here
float totalField = term*cAmoebaSim.pLabFrameDipole[pos];
float* pFt = fieldIn + pos;
unsigned int i = outputBuffers;
while (i >= 4)
{
totalField += pFt[0] + pFt[fieldComponents] + pFt[2*fieldComponents] + pFt[3*fieldComponents];
pFt += fieldComponents*4;
i -= 4;
}
if (i >= 2)
{
totalField += pFt[0] + pFt[fieldComponents];
pFt += fieldComponents*2;
i -= 2;
}
if (i > 0)
{
totalField += pFt[0];
}
fieldOut[pos] = totalField;
pos += gridDim.x * blockDim.x;
}
}
// reduce psWorkArray_3_1 -> EField
// reduce psWorkArray_3_2 -> EFieldPolar
static void kReducePmeDirectE_Fields(amoebaGpuContext amoebaGpu )
{
kReduceDirectSelfFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psE_Field->_pDevStream[0] );
LAUNCHERROR("kReducePmeE_Fields1");
kReduceDirectSelfFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevStream[0], amoebaGpu->psE_FieldPolar->_pDevStream[0] );
LAUNCHERROR("kReducePmeE_Fields2");
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
#undef GK
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
__device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& atomI, FixedFieldParticle& atomJ,
float dscale, float pscale, float fields[4][3]
#ifdef AMOEBA_DEBUG
, float4* pullBack
#endif
){
// compute the real space portion of the Ewald summation
float xr = atomJ.x - atomI.x;
float yr = atomJ.y - atomI.y;
float zr = atomJ.z - atomI.z;
// periodic boundary conditions
xr -= floor(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floor(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floor(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr* yr + zr*zr;
float r = sqrtf(r2);
// calculate the error function damping terms
float ralpha = cAmoebaSim.aewald*r;
float bn[4];
bn[0] = erfc(ralpha)/r;
float alsq2 = 2.0f*cAmoebaSim.aewald*cAmoebaSim.aewald;
float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cAmoebaSim.aewald);
float exp2a = exp(-(ralpha*ralpha));
alsq2n *= alsq2;
bn[1] = (bn[0]+alsq2n*exp2a)/r2;
alsq2n *= alsq2;
bn[2] = (3.0f*bn[1]+alsq2n*exp2a)/r2;
alsq2n *= alsq2;
bn[3] = (5.0f*bn[2]+alsq2n*exp2a)/r2;
// compute the error function scaled and unscaled terms
float scale3 = 1.0f;
float scale5 = 1.0f;
float scale7 = 1.0f;
float damp = atomI.damp*atomJ.damp;
if( damp != 0.0f ){
float ratio = (r/damp);
ratio = ratio*ratio*ratio;
float pgamma = atomI.thole < atomJ.thole ? atomI.thole : atomJ.thole;
damp = -pgamma*ratio;
if( damp > -50.0f) {
float expdamp = exp(damp);
scale3 = 1.0f - expdamp;
scale5 = 1.0f - expdamp*(1.0f-damp);
scale7 = 1.0f - expdamp*(1.0f-damp+(0.6f*damp*damp));
}
}
float dsc3 = dscale*scale3;
float dsc5 = dscale*scale5;
float dsc7 = dscale*scale7;
float psc3 = pscale*scale3;
float psc5 = pscale*scale5;
float psc7 = pscale*scale7;
float r3 = (r*r2);
float r5 = (r3*r2);
float r7 = (r5*r2);
float drr3 = (1.0f-dsc3)/r3;
float drr5 = 3.0f * (1.0f-dsc5)/r5;
float drr7 = 15.0f * (1.0f-dsc7)/r7;
float prr3 = (1.0f-psc3) / r3;
float prr5 = 3.0f *(1.0f-psc5)/r5;
float prr7 = 15.0f*(1.0f-psc7)/r7;
float dir = atomI.labFrameDipole_X*xr + atomI.labFrameDipole_Y*yr + atomI.labFrameDipole_Z*zr;
float qix = atomI.labFrameQuadrupole_XX*xr + atomI.labFrameQuadrupole_XY*yr + atomI.labFrameQuadrupole_XZ*zr;
float qiy = atomI.labFrameQuadrupole_XY*xr + atomI.labFrameQuadrupole_YY*yr + atomI.labFrameQuadrupole_YZ*zr;
float qiz = atomI.labFrameQuadrupole_XZ*xr + atomI.labFrameQuadrupole_YZ*yr + atomI.labFrameQuadrupole_ZZ*zr;
float qir = qix*xr + qiy*yr + qiz*zr;
float dkr = atomJ.labFrameDipole_X*xr + atomJ.labFrameDipole_Y*yr + atomJ.labFrameDipole_Z*zr;
float qkx = atomJ.labFrameQuadrupole_XX*xr + atomJ.labFrameQuadrupole_XY*yr + atomJ.labFrameQuadrupole_XZ*zr;
float qky = atomJ.labFrameQuadrupole_XY*xr + atomJ.labFrameQuadrupole_YY*yr + atomJ.labFrameQuadrupole_YZ*zr;
float qkz = atomJ.labFrameQuadrupole_XZ*xr + atomJ.labFrameQuadrupole_YZ*yr + atomJ.labFrameQuadrupole_ZZ*zr;
float qkr = qkx*xr + qky*yr + qkz*zr;
float fim[3],fkm[3];
float fid[3],fkd[3];
float fip[3],fkp[3];
fim[0] = -xr*(bn[1]*atomJ.q-bn[2]*dkr+bn[3]*qkr)
- bn[1]*atomJ.labFrameDipole_X + 2.0f*bn[2]*qkx;
fim[1] = -yr*(bn[1]*atomJ.q-bn[2]*dkr+bn[3]*qkr)
- bn[1]*atomJ.labFrameDipole_Y + 2.0f*bn[2]*qky;
fim[2] = -zr*(bn[1]*atomJ.q-bn[2]*dkr+bn[3]*qkr)
- bn[1]*atomJ.labFrameDipole_Z + 2.0f*bn[2]*qkz;
fkm[0] = xr*(bn[1]*atomI.q+bn[2]*dir+bn[3]*qir)
- bn[1]*atomI.labFrameDipole_X - 2.0f*bn[2]*qix;
fkm[1] = yr*(bn[1]*atomI.q+bn[2]*dir+bn[3]*qir)
- bn[1]*atomI.labFrameDipole_Y - 2.0f*bn[2]*qiy;
fkm[2] = zr*(bn[1]*atomI.q+bn[2]*dir+bn[3]*qir)
- bn[1]*atomI.labFrameDipole_Z - 2.0f*bn[2]*qiz;
fid[0] = -xr*(drr3*atomJ.q-drr5*dkr+drr7*qkr)
- drr3*atomJ.labFrameDipole_X + 2.0f*drr5*qkx;
fid[1] = -yr*(drr3*atomJ.q-drr5*dkr+drr7*qkr)
- drr3*atomJ.labFrameDipole_Y + 2.0f*drr5*qky;
fid[2] = -zr*(drr3*atomJ.q-drr5*dkr+drr7*qkr)
- drr3*atomJ.labFrameDipole_Z + 2.0f*drr5*qkz;
fkd[0] = xr*(drr3*atomI.q+drr5*dir+drr7*qir)
- drr3*atomI.labFrameDipole_X - 2.0f*drr5*qix;
fkd[1] = yr*(drr3*atomI.q+drr5*dir+drr7*qir)
- drr3*atomI.labFrameDipole_Y - 2.0f*drr5*qiy;
fkd[2] = zr*(drr3*atomI.q+drr5*dir+drr7*qir)
- drr3*atomI.labFrameDipole_Z - 2.0f*drr5*qiz;
fip[0] = -xr*(prr3*atomJ.q-prr5*dkr+prr7*qkr)
- prr3*atomJ.labFrameDipole_X + 2.0f*prr5*qkx;
fip[1] = -yr*(prr3*atomJ.q-prr5*dkr+prr7*qkr)
- prr3*atomJ.labFrameDipole_Y + 2.0f*prr5*qky;
fip[2] = -zr*(prr3*atomJ.q-prr5*dkr+prr7*qkr)
- prr3*atomJ.labFrameDipole_Z + 2.0f*prr5*qkz;
fkp[0] = xr*(prr3*atomI.q+prr5*dir+prr7*qir)
- prr3*atomI.labFrameDipole_X - 2.0f*prr5*qix;
fkp[1] = yr*(prr3*atomI.q+prr5*dir+prr7*qir)
- prr3*atomI.labFrameDipole_Y - 2.0f*prr5*qiy;
fkp[2] = zr*(prr3*atomI.q+prr5*dir+prr7*qir)
- prr3*atomI.labFrameDipole_Z - 2.0f*prr5*qiz;
// increment the field at each site due to this interaction
if( r2 <= cAmoebaSim.cutoffDistance2 ){
fields[0][0] = fim[0] - fid[0];
fields[1][0] = fkm[0] - fkd[0];
fields[2][0] = fim[0] - fip[0];
fields[3][0] = fkm[0] - fkp[0];
fields[0][1] = fim[1] - fid[1];
fields[1][1] = fkm[1] - fkd[1];
fields[2][1] = fim[1] - fip[1];
fields[3][1] = fkm[1] - fkp[1];
fields[0][2] = fim[2] - fid[2];
fields[1][2] = fkm[2] - fkd[2];
fields[2][2] = fim[2] - fip[2];
fields[3][2] = fkm[2] - fkp[2];
} else {
fields[0][0] = 0.0f;
fields[1][0] = 0.0f;
fields[2][0] = 0.0f;
fields[3][0] = 0.0f;
fields[0][1] = 0.0f;
fields[1][1] = 0.0f;
fields[2][1] = 0.0f;
fields[3][1] = 0.0f;
fields[0][2] = 0.0f;
fields[1][2] = 0.0f;
fields[2][2] = 0.0f;
fields[3][2] = 0.0f;
}
#ifdef AMOEBA_DEBUG
pullBack[0].x = xr;
pullBack[0].y = yr;
pullBack[0].z = zr;
pullBack[0].w = r2;
pullBack[1].x = atomJ.x - atomI.x;
pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z;
pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z);
/*
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
#ifdef AMOEBA_DEBUG
static const char* methodName = "computeCudaAmoebaPmeFixedEField";
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "\n%s\n", methodName ); (void) fflush( amoebaGpu->log );
}
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
// N2 debug array
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload();
(*gpu->psInteractionCount)[0] = gpu->sim.workUnits;
gpu->psInteractionCount->Upload();
// print intermediate results for the targetAtom
unsigned int targetAtom = 0;
#endif
kClearFields_3( amoebaGpu, 2 );
if (gpu->bOutputBufferPerWarp){
(void) fprintf( amoebaGpu->log, "N2 warp\n" );
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
debugArray->_pDevStream[0], targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif
} else {
#ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "N2 no warp\n" );
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
debugArray->_pDevStream[0], targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif
}
LAUNCHERROR("kCalculateAmoebaPmeDirectFixedE_Field_kernel");
#if 0
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
//float index = 1.0f;
float index = (float) ii;
for( unsigned int jj = 0; jj < 3*amoebaGpu->paddedNumberOfAtoms; jj += 3 ){
unsigned int kk = 3*ii*amoebaGpu->paddedNumberOfAtoms + jj;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+1] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+2] = index;
}
}
amoebaGpu->psWorkArray_3_1->Upload();
#endif
kReducePmeDirectE_Fields( amoebaGpu );
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
gpu->psInteractionCount->Download();
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
amoebaGpu->psWorkArray_3_1->Download();
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psE_Field->Download();
amoebaGpu->psE_FieldPolar->Download();
(void) fprintf( amoebaGpu->log, "OutEFields\n" );
int maxPrint = 32;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
int indexOffset = ii*3;
// E_Field
(void) fprintf( amoebaGpu->log,"E[%16.9e %16.9e %16.9e] ",
amoebaGpu->psE_Field->_pSysStream[0][indexOffset],
amoebaGpu->psE_Field->_pSysStream[0][indexOffset+1],
amoebaGpu->psE_Field->_pSysStream[0][indexOffset+2] );
// E_Field polar
(void) fprintf( amoebaGpu->log,"Epol[%16.9e %16.9e %16.9e] ",
amoebaGpu->psE_FieldPolar->_pSysStream[0][indexOffset],
amoebaGpu->psE_FieldPolar->_pSysStream[0][indexOffset+1],
amoebaGpu->psE_FieldPolar->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
}
}
(void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "EFields End\n" );
(void) fprintf( amoebaGpu->log, "DebugQ\n" );
debugArray->Download();
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d PmeFixedEField\n", jj );
for( int kk = 0; kk < 7; kk++ ){
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
debugIndex += paddedNumberOfAtoms;
}
(void) fprintf( amoebaGpu->log,"\n" );
}
// write results to file
if( 1 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
delete debugArray;
}
#endif
if( 1 ){
std::vector<int> fileId;
fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
unsigned int* workUnit,
float* outputEField,
float* outputEFieldPolar
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
float4 pullBack[12];
float dScaleVal;
float pScaleVal;
#endif
extern __shared__ FixedFieldParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
FixedFieldParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
FixedFieldParticle localParticle;
loadFixedFieldShared( &localParticle, atomI );
float fieldSum[3];
float fieldPolarSum[3];
fieldSum[0] = 0.0f;
fieldSum[1] = 0.0f;
fieldSum[2] = 0.0f;
fieldPolarSum[0] = 0.0f;
fieldPolarSum[1] = 0.0f;
fieldPolarSum[2] = 0.0f;
if (x == y)
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), atomI );
if (!bExclusionFlag)
{
// this branch is never exercised since it includes the
// interaction between atomI and itself which is always excluded
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[4][3];
// load coords, charge, ...
#ifdef AMOEBA_DEBUG
dScaleVal = 1.0f;
pScaleVal = 1.0f;
#endif
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[j], 1.0f, 1.0f, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += match ? 0.0f : ijField[0][0];
fieldSum[1] += match ? 0.0f : ijField[0][1];
fieldSum[2] += match ? 0.0f : ijField[0][2];
fieldPolarSum[0] += match ? 0.0f : ijField[2][0];
fieldPolarSum[1] += match ? 0.0f : ijField[2][1];
fieldPolarSum[2] += match ? 0.0f : ijField[2][2];
}
}
else // bExclusion
{
unsigned int xi = x >> GRIDBITS;
unsigned int cell = xi + xi*cAmoebaSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[4][3];
float dScaleValue;
float pScaleValue;
getMaskedDScaleFactor( j, dScaleMask, &dScaleValue );
getMaskedPScaleFactor( j, pScaleMask, &pScaleValue );
#ifdef AMOEBA_DEBUG
dScaleVal = dScaleValue;
pScaleVal = pScaleValue;
#endif
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[j], dScaleValue, pScaleValue, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += match ? 0.0f : ijField[0][0];
fieldSum[1] += match ? 0.0f : ijField[0][1];
fieldSum[2] += match ? 0.0f : ijField[0][2];
fieldPolarSum[0] += match ? 0.0f : ijField[2][0];
fieldPolarSum[1] += match ? 0.0f : ijField[2][1];
fieldPolarSum[2] += match ? 0.0f : ijField[2][2];
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom ){
unsigned int index = atomI == targetAtom ? (y + j) : atomI;
unsigned int pullBackIndex = 0;
unsigned int indexI = 0;
unsigned int indexJ = indexI ? 0 : 2;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j);
debugArray[index].z = dScaleValue;
debugArray[index].w = pScaleValue;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
pullBackIndex++;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
float flag = 7.0f;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputEField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputEFieldPolar );
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputEField );
load3dArray( offset, fieldPolarSum, outputEFieldPolar );
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx) );
}
// zero shared fields
zeroFixedFieldParticleSharedField( &(sA[threadIdx.x]) );
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[4][3];
// load coords, charge, ...
#ifdef AMOEBA_DEBUG
dScaleVal = 1.0f;
pScaleVal = 1.0f;
#endif
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[tj], 1.0f, 1.0f, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += ijField[0][0];
fieldSum[1] += ijField[0][1];
fieldSum[2] += ijField[0][2];
fieldPolarSum[0] += ijField[2][0];
fieldPolarSum[1] += ijField[2][1];
fieldPolarSum[2] += ijField[2][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0];
psA[tj].eField[1] += ijField[1][1];
psA[tj].eField[2] += ijField[1][2];
psA[tj].eFieldP[0] += ijField[3][0];
psA[tj].eFieldP[1] += ijField[3][1];
psA[tj].eFieldP[2] += ijField[3][2];
#ifdef AMOEBA_DEBUG
if( (atomI == targetAtom || (y + tj) == targetAtom) ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
unsigned int indexI = (atomI == targetAtom) ? 0 : 2;
unsigned int indexJ = (atomI == targetAtom) ? 2 : 0;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = dScaleVal;
debugArray[index].w = pScaleVal;
unsigned int pullBackIndex = 0;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;;
pullBackIndex++;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;;
float flag = 8.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
#if 0
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = pScaleValue + 10.0f;
#endif
}
#endif
tj = (tj + 1) & (GRID - 1);
}
}
else // bExclusion
{
// Read fixed atom data into registers and GRF
unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS;
unsigned int cell = xi+yi*cAmoebaSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[4][3];
float dScaleValue;
float pScaleValue;
getMaskedDScaleFactor( tj, dScaleMask, &dScaleValue );
getMaskedPScaleFactor( tj, pScaleMask, &pScaleValue );
#ifdef AMOEBA_DEBUG
dScaleVal = dScaleValue;
pScaleVal = pScaleValue;
#endif
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[tj], dScaleValue, pScaleValue, ijField
#ifdef AMOEBA_DEBUG
, pullBack
#endif
);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += ijField[0][0];
fieldSum[1] += ijField[0][1];
fieldSum[2] += ijField[0][2];
fieldPolarSum[0] += ijField[2][0];
fieldPolarSum[1] += ijField[2][1];
fieldPolarSum[2] += ijField[2][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0];
psA[tj].eField[1] += ijField[1][1];
psA[tj].eField[2] += ijField[1][2];
psA[tj].eFieldP[0] += ijField[3][0];
psA[tj].eFieldP[1] += ijField[3][1];
psA[tj].eFieldP[2] += ijField[3][2];
#ifdef AMOEBA_DEBUG
if( (atomI == targetAtom || (y + tj) == targetAtom) ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
unsigned int indexI = (atomI == targetAtom) ? 0 : 2;
unsigned int indexJ = (atomI == targetAtom) ? 2 : 0;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = dScaleVal;
debugArray[index].w = pScaleVal;
unsigned int pullBackIndex = 0;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;;
pullBackIndex++;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;;
float flag = 9.0f;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI][0];
debugArray[index].y = ijField[indexI][1];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0];
debugArray[index].y = ijField[indexJ][1];
debugArray[index].z = ijField[indexJ][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
}
#endif
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputEField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputEFieldPolar );
offset = 3*(y + tgx + warp*cAmoebaSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eField, outputEField );
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
#else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputEField );
load3dArray( offset, fieldPolarSum, outputEFieldPolar );
offset = 3*(y + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].eField, outputEField );
load3dArray( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
#endif
lasty = y;
}
pos++;
}
}
......@@ -1027,6 +1027,9 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
(void) fflush( amoebaGpu->log );
#endif
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture(NULL, &tabulatedErfcRef, gpu->psTabulatedErfc->_pDevData, &channelDesc, gpu->psTabulatedErfc->_length*sizeof(float));
kCalculateAmoebaCudaRealSpaceEwaldN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(RealSpaceEwaldParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
......
......@@ -368,8 +368,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaFixedEAndGkFields( amoebaGpu );
cudaComputeAmoebaMutualInducedAndGkField( amoebaGpu );
} else {
cudaComputeAmoebaFixedEField( amoebaGpu );
cudaComputeAmoebaMutualInducedField( amoebaGpu );
if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){
cudaComputeAmoebaFixedEField( amoebaGpu );
cudaComputeAmoebaMutualInducedField( amoebaGpu );
} else {
cudaComputeAmoebaPmeFixedEField( amoebaGpu );
}
}
// check if induce dipole calculation converged -- abort if it did not
......
......@@ -1989,6 +1989,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
int usePme = 0;
double aewald = 0.0;
double cutoffDistance = 0.0;
double box[3] = { 10.0, 10.0, 10.0 };
// usePme, aewald, cutoffDistance added w/ Version 1
......@@ -1996,7 +1997,11 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
usePme = atoi( tokens[2].c_str() );
aewald = atof( tokens[3].c_str() );
cutoffDistance = atof( tokens[4].c_str() );
box[0] = atof( tokens[5].c_str() );
box[1] = atof( tokens[6].c_str() );
box[2] = atof( tokens[7].c_str() );
}
if( usePme ){
multipoleForce->setNonbondedMethod( AmoebaMultipoleForce::PME );
} else {
......@@ -2004,6 +2009,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
}
multipoleForce->setAEwald( aewald );
multipoleForce->setCutoffDistance( cutoffDistance );
system.setDefaultPeriodicBoxVectors( Vec3(box[0], 0.0, 0.0), Vec3(0.0, box[1], 0.0), Vec3(0.0, 0.0, box[2]) );
if( log ){
(void) fprintf( log, "%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f\n",
methodName.c_str(), numberOfMultipoles, usePme, aewald, cutoffDistance );
......@@ -2106,10 +2112,26 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
double polarityConversion = AngstromToNm*AngstromToNm*AngstromToNm;
double dampingFactorConversion = sqrt( AngstromToNm );
float scalingDistanceCutoff = static_cast<float>(multipoleForce->getScalingDistanceCutoff());
scalingDistanceCutoff *= static_cast<float>(AngstromToNm);
multipoleForce->setScalingDistanceCutoff( scalingDistanceCutoff );
multipoleForce->setAEwald( multipoleForce->getAEwald()/AngstromToNm);
multipoleForce->setCutoffDistance( multipoleForce->getCutoffDistance()*AngstromToNm );
multipoleForce->setScalingDistanceCutoff( multipoleForce->getScalingDistanceCutoff()*AngstromToNm );
Vec3 a,b,c;
system.getDefaultPeriodicBoxVectors( a, b, c);
a[0] *= AngstromToNm;
a[1] *= AngstromToNm;
a[2] *= AngstromToNm;
b[0] *= AngstromToNm;
b[1] *= AngstromToNm;
b[2] *= AngstromToNm;
c[0] *= AngstromToNm;
c[1] *= AngstromToNm;
c[2] *= AngstromToNm;
system.setDefaultPeriodicBoxVectors( a, b, c);
for( int ii = 0; ii < multipoleForce->getNumMultipoles(); ii++ ){
......@@ -2144,6 +2166,15 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
(void) fprintf( log, "%s Sample of parameters using %s units.\n", methodName.c_str(),
(useOpenMMUnits ? "OpenMM" : "Amoeba") );
std::string nonbondedMethod = multipoleForce->getNonbondedMethod( ) == AmoebaMultipoleForce::PME ? "PME" : "NoCutoff";
(void) fprintf( log, "NonbondedMethod=%s aEwald=%15.7e cutoff=%15.7e.\n", nonbondedMethod.c_str(),
multipoleForce->getAEwald(), multipoleForce->getCutoffDistance() );
Vec3 a,b,c;
system.getDefaultPeriodicBoxVectors( a, b, c );
(void) fprintf( log, "Box=[%12.3f %12.3f %12.3f] [%12.3f %12.3f %12.3f] [%12.3f %12.3f %12.3f]\n",
a[0], a[1], a[2], b[0], b[1], b[2], c[0], c[1], c[2] );
(void) fprintf( log, "Supplementary fields %u: ", static_cast<unsigned int>(supplementary.size()) );
for( MapStringVectorOfVectorsCI ii = supplementary.begin(); ii != supplementary.end(); ii++ ){
(void) fprintf( log, "%s ", (*ii).first.c_str() );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment