Commit b1dc92cc authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added support for polarization type='direct' option

WCA force now written directly to cuda.force4 array
parent 959f2fab
......@@ -66,6 +66,19 @@ public:
PME = 1
};
enum AmoebaPolarizationType {
/**
* Mutual polarization
*/
Mutual = 0,
/**
* Direct polarization
*/
Direct = 1
};
enum MultipoleAxisTypes { ZThenX = 0, Bisector = 1, ZBisect = 2, ThreeFold = 3, ZOnly = 4, NoAxisType = 5, LastAxisTypeIndex = 6 };
// Algorithm used to converge mutual induced dipoles:
......@@ -100,6 +113,16 @@ public:
*/
void setNonbondedMethod(AmoebaNonbondedMethod method);
/**
* Get polarization type
*/
AmoebaPolarizationType getPolarizationType( void ) const;
/**
* Set the polarization type
*/
void setPolarizationType(AmoebaPolarizationType type );
/**
* Get the cutoff distance (in nm) being used for nonbonded interactions. If the NonbondedMethod in use
* is NoCutoff, this value will have no effect.
......@@ -326,6 +349,7 @@ protected:
private:
AmoebaNonbondedMethod nonbondedMethod;
AmoebaPolarizationType polarizationType;
double cutoffDistance;
double aewald;
int pmeBSplineOrder;
......
......@@ -38,7 +38,7 @@ using namespace OpenMM;
using std::string;
using std::vector;
AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), pmeBSplineOrder(5), cutoffDistance(1.0), ewaldErrorTol(1e-4), mutualInducedIterationMethod(SOR), mutualInducedMaxIterations(60),
AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), polarizationType(Mutual), pmeBSplineOrder(5), cutoffDistance(1.0), ewaldErrorTol(1e-4), mutualInducedIterationMethod(SOR), mutualInducedMaxIterations(60),
mutualInducedTargetEpsilon(1.0e-05), scalingDistanceCutoff(100.0), electricConstant(138.9354558456) {
}
......@@ -50,6 +50,14 @@ void AmoebaMultipoleForce::setNonbondedMethod( AmoebaMultipoleForce::AmoebaNonbo
nonbondedMethod = method;
}
AmoebaMultipoleForce::AmoebaPolarizationType AmoebaMultipoleForce::getPolarizationType( void ) const {
return polarizationType;
}
void AmoebaMultipoleForce::setPolarizationType( AmoebaMultipoleForce::AmoebaPolarizationType type ) {
polarizationType = type;
}
double AmoebaMultipoleForce::getCutoffDistance( void ) const {
return cutoffDistance;
}
......
......@@ -940,8 +940,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
}
}
int polarizationType = static_cast<int>(force.getPolarizationType());
int iterativeMethod = static_cast<int>(force.getMutualInducedIterationMethod());
if( iterativeMethod != 0 ){
if( iterativeMethod != 0 && polarizationType == 0 ){
throw OpenMMException("Iterative method for mutual induced dipoles not recognized.\n");
}
......@@ -950,13 +951,17 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
throw OpenMMException("AmoebaMultipoleForce nonbonded method not recognized.\n");
}
if( polarizationType != 0 && polarizationType != 1 ){
throw OpenMMException("AmoebaMultipoleForce polarization type not recognized.\n");
}
gpuSetAmoebaMultipoleParameters(data.getAmoebaGpu(), charges, dipoles, quadrupoles, axisTypes, multipoleAtomZs, multipoleAtomXs, multipoleAtomYs,
tholes, scalingDistanceCutoff, dampingFactors, polarity,
multipoleAtomCovalentInfo, covalentDegree, minCovalentIndices, minCovalentPolarizationIndices, (maxCovalentRange+2),
static_cast<int>(force.getMutualInducedIterationMethod()),
force.getMutualInducedMaxIterations(),
static_cast<float>( force.getMutualInducedTargetEpsilon()),
nonbondedMethod,
nonbondedMethod, polarizationType,
static_cast<float>( force.getCutoffDistance()),
static_cast<float>( force.getAEwald()),
static_cast<float>( force.getElectricConstant()) );
......
......@@ -358,6 +358,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameDipole, log );
gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameQuadrupole, log );
(void) fprintf( log, " polarizationType %d\n", amoebaGpu->amoebaSim.polarizationType );
(void) fprintf( log, " maxCovalentDegreeSz %d\n", amoebaGpu->maxCovalentDegreeSz );
(void) fprintf( log, " solventDielectric %10.3f\n", amoebaGpu->solventDielectric);
(void) fprintf( log, " scalingDistanceCutoff %15.7e\n", amoebaGpu->amoebaSim.scalingDistanceCutoff );
......@@ -1519,7 +1520,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const std::vector< std::vector< std::vector<int> > >& multipoleParticleCovalentInfo, const std::vector<int>& covalentDegree,
const std::vector<int>& minCovalentIndices, const std::vector<int>& minCovalentPolarizationIndices, int maxCovalentRange,
int mutualInducedIterativeMethod, int mutualInducedMaxIterations, float mutualInducedTargetEpsilon,
int nonbondedMethod, float cutoffDistance, float alphaEwald, float electricConstant ) {
int nonbondedMethod, int polarizationType, float cutoffDistance, float alphaEwald, float electricConstant ) {
// ---------------------------------------------------------------------------------------
......@@ -1545,6 +1546,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// allocate memory
amoebaGpu->mutualInducedIterativeMethod = mutualInducedIterativeMethod;
amoebaGpu->amoebaSim.polarizationType = polarizationType;
gpuMutualInducedFieldAllocate( amoebaGpu );
amoebaGpu->mutualInducedMaxIterations = mutualInducedMaxIterations;
amoebaGpu->mutualInducedTargetEpsilon = mutualInducedTargetEpsilon;
......@@ -1562,9 +1564,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d]\n",
(void) fprintf( amoebaGpu->log,"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d] polarizationType=%d (0=mutual/1=direct)\n",
methodName.c_str(), nonbondedMethod, amoebaGpu->multipoleNonbondedMethod,
AMOEBA_NO_CUTOFF, AMOEBA_PARTICLE_MESH_EWALD );
AMOEBA_NO_CUTOFF, AMOEBA_PARTICLE_MESH_EWALD, polarizationType );
(void) fflush( amoebaGpu->log );
}
amoebaGpu->amoebaSim.sqrtPi = std::sqrt( 3.14159265358f );
......@@ -2554,6 +2556,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
unsigned int particles = radii.size();
amoebaGpu->psWcaDispersionRadiusEpsilon = new CUDAStream<float2>(paddedNumberOfAtoms, 1, "WcaDispersionRadiusEpsilon");
amoebaGpu->amoebaSim.pWcaDispersionRadiusEpsilon = amoebaGpu->psWcaDispersionRadiusEpsilon->_pDevData;
for (unsigned int ii = 0; ii < particles; ii++)
{
amoebaGpu->psWcaDispersionRadiusEpsilon->_pSysData[ii].x = radii[ii];
......
......@@ -136,6 +136,7 @@ struct cudaAmoebaGmxSimulation {
float sqrtPi; // sqrt(PI)
float scalingDistanceCutoff; // scaling cutoff
float2* pDampingFactorAndThole; // Thole & damping factors
int polarizationType; // polarization type (0=Mutual, 1=Direct)
int4* pMultipoleParticlesIdsAndAxisType;
int4* pMultipoleParticlesTorqueBufferIndices;
......@@ -181,8 +182,8 @@ struct cudaAmoebaGmxSimulation {
float awater;
float shctd;
float dispoff;
float totalMaxWcaDispersionEnergy;
float2* pWcaDispersionRadiusEpsilon;
// scaling indices
int* pScaleIndicesIndex;
......
......@@ -293,7 +293,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const std::vector< std::vector< std::vector<int> > >& multipoleAtomCovalentInfo, const std::vector<int>& covalentDegree,
const std::vector<int>& minCovalentIndices, const std::vector<int>& minCovalentPolarizationIndices, int maxCovalentRange,
int mutualInducedIterationMethod, int mutualInducedMaxIterations, float mutualInducedTargetEpsilon,
int nonbondedMethod, float cutoffDistance, float alphaEwald, float electricConstant );
int nonbondedMethod, int polarizationType, float cutoffDistance, float alphaEwald, float electricConstant );
extern "C"
......
......@@ -388,23 +388,6 @@ if( 1 ){
amatrixProductVector3(atomJ.labFrameQuadrupole, atomI.inducedDipole, temp5);
amatrixProductVector3(atomI.labFrameQuadrupole, atomJ.inducedDipole , qIuJ);//MK
/*
ftm2i(1) = gfi(1)*xr + 0.5d0*
& (- rr3*ck*(uind(1,i)*psc3+uinp(1,i)*dsc3)
& + rr5*sc(4)*(uind(1,i)*psc5+uinp(1,i)*dsc5)
& - rr7*sc(6)*(uind(1,i)*psc7+uinp(1,i)*dsc7))
& + (rr3*ci*(uind(1,k)*psc3+uinp(1,k)*dsc3)
& + rr5*sc(3)*(uind(1,k)*psc5+uinp(1,k)*dsc5)
& + rr7*sc(5)*(uind(1,k)*psc7+uinp(1,k)*dsc7))*0.5d0
& + rr5*scale5i*(sci(4)*uinp(1,i)+scip(4)*uind(1,i)
& + sci(3)*uinp(1,k)+scip(3)*uind(1,k))*0.5d0
& + 0.5d0*(sci(4)*psc5+scip(4)*dsc5)*rr5*di(1)
& + 0.5d0*(sci(3)*psc5+scip(3)*dsc5)*rr5*dk(1)
& + 0.5d0*gfi(4)*((qkui(1)-qiuk(1))*psc5
& + (qkuip(1)-qiukp(1))*dsc5)
& + gfi(5)*qir(1) + gfi(6)*qkr(1)
*/
float ftm2i_0 = gfi1*deltaR[0] +
0.5f*(-rr3*atomJ.q*(atomI.inducedDipole[0]*psc0 + atomI.inducedDipoleP[0]*dsc0) +
rr5*sc4*(atomI.inducedDipole[0]*psc1 + atomI.inducedDipoleP[0]*dsc1) -
......@@ -460,6 +443,18 @@ if( 1 ){
ftm2i_1 -= (fridmp_1 + findmp_1);
ftm2i_2 -= (fridmp_2 + findmp_2);
if( cAmoebaSim.polarizationType )
{
float gfd = 0.5*(rr5*scip2*scaleI0 - rr7*(scip3*sci4+sci3*scip4)*scaleI1);
float temp5 = 0.5*rr5*scaleI1;
float fdir_0 = gfd*deltaR[0] + temp5*(sci4*atomI.inducedDipoleP[0] + scip4*atomI.inducedDipole[0] + sci3*atomJ.inducedDipoleP[0] + scip3*atomJ.inducedDipole[0]);
float fdir_1 = gfd*deltaR[1] + temp5*(sci4*atomI.inducedDipoleP[1] + scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1] + scip3*atomJ.inducedDipole[1]);
float fdir_2 = gfd*deltaR[2] + temp5*(sci4*atomI.inducedDipoleP[2] + scip4*atomI.inducedDipole[2] + sci3*atomJ.inducedDipoleP[2] + scip3*atomJ.inducedDipole[2]);
ftm2i_0 -= fdir_0 - findmp_0;
ftm2i_1 -= fdir_1 - findmp_1;
ftm2i_2 -= fdir_2 - findmp_2;
}
// now perform the torque calculation;
// intermediate terms for torque between multipoles i and j;
......
......@@ -1311,7 +1311,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
// mutual polarization electrostatic solvation free energy gradient
// if (poltyp .eq. 'MUTUAL'){
if( cAmoebaSim.polarizationType == 0 ){
dpdx = dpdx - 0.5f *
(atomI.inducedDipole[0]*(atomJ.inducedDipoleP[0]*gux5+atomJ.inducedDipoleP[1]*gux6+atomJ.inducedDipoleP[2]*gux7)
......@@ -1345,7 +1345,7 @@ __device__ void calculateKirkwoodPairIxn_kernel( KirkwoodParticle& atomI,
+ atomJ.inducedDipole[2]*(atomI.inducedDipoleP[0]*guz22+atomI.inducedDipoleP[1]*guz23+atomI.inducedDipoleP[2]*guz24);
dpbi = dpbi - 0.5f*atomJ.bornRadius*duvdr;
dpbk = dpbk - 0.5f*atomI.bornRadius*duvdr;
// }
}
// torque due to induced reaction field on permanent dipoles
......
......@@ -515,24 +515,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
// correction to convert mutual to direct polarization force
#if 0
if (poltyp .eq. 'DIRECT') then;
gfd = 0.5f * (rr5*scip2*scale3i;
& - rr7*(scip3*sci4+sci3*scip4)*scale5i);
fdir1 = gfd*xr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipolePS[0]+scip4*atomI.inducedDipoleS[0];
& +sci3*atomJ.inducedDipolePS[0]+scip3*atomJ.inducedDipoleS[0]);
fdir2 = gfd*yr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipolePS[1]+scip4*atomI.inducedDipoleS[1];
& +sci3*atomJ.inducedDipolePS[1]+scip3*atomJ.inducedDipoleS[1]);
fdir3 = gfd*zr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipolePS[2]+scip4*atomI.inducedDipoleS[2];
& +sci3*atomJ.inducedDipolePS[2]+scip3*atomJ.inducedDipoleS[2]);
ftm2i1 = ftm2i1 - fdir1 + findmp1;
ftm2i2 = ftm2i2 - fdir2 + findmp2;
ftm2i3 = ftm2i3 - fdir3 + findmp3;
end if;
#endif
if ( cAmoebaSim.polarizationType ){
float gfd = 0.5f * (rr5*scip2*scale3i - rr7*(scip3*sci4+sci3*scip4)*scale5i);
float fdir1 = gfd*xr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipolePS[0]+scip4*atomI.inducedDipoleS[0] + sci3*atomJ.inducedDipolePS[0]+scip3*atomJ.inducedDipoleS[0]);
float fdir2 = gfd*yr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipolePS[1]+scip4*atomI.inducedDipoleS[1] + sci3*atomJ.inducedDipolePS[1]+scip3*atomJ.inducedDipoleS[1]);
float fdir3 = gfd*zr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipolePS[2]+scip4*atomI.inducedDipoleS[2] + sci3*atomJ.inducedDipolePS[2]+scip3*atomJ.inducedDipoleS[2]);
ftm2i1 -= fdir1 - findmp1;
ftm2i2 -= fdir2 - findmp2;
ftm2i3 -= fdir3 - findmp3;
}
// now perform the torque calculation
// intermediate terms for torque between multipoles i and k
......@@ -803,24 +795,16 @@ __device__ void calculateKirkwoodEDiffPairIxn_kernel( KirkwoodEDiffParticle& ato
// correction to convert mutual to direct polarization force;
#if 0
if (poltyp .eq. 'DIRECT') then;
gfd = 0.5f * (rr5*scip2*scale3i;
& - rr7*(scip3*sci4+sci3*scip4)*scale5i);
fdir1 = gfd*xr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
& +sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
fdir2 = gfd*yr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
& +sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
fdir3 = gfd*zr + 0.5f*rr5*scale5i;
& * (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
& +sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 = ftm2i1 - fdir1 + findmp1;
ftm2i2 = ftm2i2 - fdir2 + findmp2;
ftm2i3 = ftm2i3 - fdir3 + findmp3;
end if;
#endif
if ( cAmoebaSim.polarizationType ){
float gfd = 0.5f * (rr5*scip2*scale3i- rr7*(scip3*sci4+sci3*scip4)*scale5i);
float fdir1 = gfd*xr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0] + sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
float fdir2 = gfd*yr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
float fdir3 = gfd*zr + 0.5f*rr5*scale5i* (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2] + sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 -= fdir1 - findmp1;
ftm2i2 -= fdir2 - findmp2;
ftm2i3 -= fdir3 - findmp3;
}
// now perform the torque calculation
// intermediate terms for torque between multipoles i and k
......
......@@ -784,6 +784,15 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
}
#endif
// if polarization type is direct, set flags signalling done and return
if( amoebaGpu->amoebaSim.polarizationType )
{
amoebaGpu->mutualInducedDone = 1;
amoebaGpu->mutualInducedConverged = 1;
return;
}
// ---------------------------------------------------------------------------------------
done = 0;
......
......@@ -532,6 +532,15 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
}
#endif
// if polarization type is direct, set flags signalling done and return
if( amoebaGpu->amoebaSim.polarizationType )
{
amoebaGpu->mutualInducedDone = 1;
amoebaGpu->mutualInducedConverged = 1;
return;
}
// ---------------------------------------------------------------------------------------
done = 0;
......
......@@ -911,9 +911,16 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
int j2 = deriv2[k+1];
int j3 = deriv3[k+1];
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1] + inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1];
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2] + inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3] + inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3];
f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1];
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3];
if( cAmoebaSim.polarizationType == 0 )
{
f.x += inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1];
f.y += inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2];
f.z += inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3];
}
}
......
......@@ -704,36 +704,22 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// correction to convert mutual to direct polarization force
/*
if (poltyp .eq. 'DIRECT') {
gfd = 0.5f * (bn2*scip2;
& - bn3*(scip3*sci4+sci3*scip4));
gfdr = 0.5f * (rr5*scip2*usc3;
& - rr7*(scip3*sci4;
& +sci3*scip4)*usc5);
ftm2i1 = ftm2i1 - gfd*xr - 0.5f*bn2*;
& (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
& +sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
ftm2i2 = ftm2i2 - gfd*yr - 0.5f*bn2*;
& (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
& +sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
ftm2i3 = ftm2i3 - gfd*zr - 0.5f*bn2*;
& (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
& +sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
fdir1 = gfdr*xr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0];
& + sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
fdir2 = gfdr*yr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1];
& + sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
fdir3 = gfdr*zr + 0.5f*usc5*rr5*;
& (sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2];
& + sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 = ftm2i1 + fdir1 + findmp1;
ftm2i2 = ftm2i2 + fdir2 + findmp2;
ftm2i3 = ftm2i3 + fdir3 + findmp3;
if( cAmoebaSim.polarizationType ){
float gfd = 0.5f * (bn2*scip2 - bn3*(scip3*sci4+sci3*scip4));
ftm2i1 -= gfd*xr + 0.5f*bn2*(sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0]+sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
ftm2i2 -= gfd*yr + 0.5f*bn2*(sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1]+sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
ftm2i3 -= gfd*zr + 0.5f*bn2*(sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2]+sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
float gfdr = 0.5f * (rr5*scip2*usc3 - rr7*(scip3*sci4 +sci3*scip4)*usc5);
float fdir1 = gfdr*xr + 0.5f*usc5*rr5*(sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0] + sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0]);
float fdir2 = gfdr*yr + 0.5f*usc5*rr5*(sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1]);
float fdir3 = gfdr*zr + 0.5f*usc5*rr5*(sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2] + sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2]);
ftm2i1 += fdir1 + findmp1;
ftm2i2 += fdir2 + findmp2;
ftm2i3 += fdir3 + findmp3;
}
*/
// intermediate variables for induced torque terms
......@@ -1198,6 +1184,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
kCalculateAmoebaPMEInducedDipoleForces( amoebaGpu );
cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque );
......
......@@ -572,44 +572,25 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "CudaEFieldPolarity", fileId, outputVector );
/*
amoebaGpu->psE_FieldPolar->Download();
amoebaGpu->psInducedDipole->Download(),
amoebaGpu->psInducedDipolePolar->Download();
amoebaGpu->psPolarizability->Download();
(void) fprintf( amoebaGpu->log, "%s Initial setup for matrix multiply\n", methodName );
int offset = 0;
int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d pol=%12.4e ", ii,
amoebaGpu->psPolarizability->_pSysData[offset] );
if( amoebaGpu->psPolarizability->_pSysData[offset] != amoebaGpu->psPolarizability->_pSysData[offset+1] ||
amoebaGpu->psPolarizability->_pSysData[offset] != amoebaGpu->psPolarizability->_pSysData[offset+2] ){
(void) fprintf( amoebaGpu->log, "PolX!!! %12.4e %12.4e ", amoebaGpu->psPolarizability->_pSysData[offset+1], amoebaGpu->psPolarizability->_pSysData[offset+2] );
}
(void) fprintf( amoebaGpu->log," E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psE_Field->_pSysData[offset], amoebaGpu->psE_Field->_pSysData[offset+1], amoebaGpu->psE_Field->_pSysData[offset+2],
amoebaGpu->psInducedDipole->_pSysData[offset], amoebaGpu->psInducedDipole->_pSysData[offset+1], amoebaGpu->psInducedDipole->_pSysData[offset+2] );
(void) fprintf( amoebaGpu->log,"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psE_FieldPolar->_pSysData[offset], amoebaGpu->psE_FieldPolar->_pSysData[offset+1], amoebaGpu->psE_FieldPolar->_pSysData[offset+2],
amoebaGpu->psInducedDipolePolar->_pSysData[offset], amoebaGpu->psInducedDipolePolar->_pSysData[offset+1], amoebaGpu->psInducedDipolePolar->_pSysData[offset+2] );
offset += 3;
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) )ii = (gpu->natoms - maxPrint);
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeEFieldPolarity", fileId, outputVector );
}
#endif
void) fflush( amoebaGpu->log );
*/
// if polarization type is direct, set flags signalling done and return
if( amoebaGpu->amoebaSim.polarizationType )
{
amoebaGpu->mutualInducedDone = 1;
amoebaGpu->mutualInducedConverged = 1;
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
return;
}
#endif
// ---------------------------------------------------------------------------------------
......@@ -624,35 +605,6 @@ void) fflush( amoebaGpu->log );
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n");
#ifdef GET_EFIELD_FROM_FILE
{
std::string fileName = "waterInduceRecip.txt";
StringVectorVector fileContents;
readFile( fileName, fileContents );
unsigned int offset = 0;
amoebaGpu->psWorkVector[0]->Download();
amoebaGpu->psWorkVector[1]->Download();
if( amoebaGpu->log )(void) fprintf( amoebaGpu->log, "Read file: %s %u\n", fileName.c_str(), fileContents.size() ); fflush( amoebaGpu->log );
float conversion = 100.0f;
for( unsigned int ii = 1; ii < fileContents.size()-1; ii++ ){
StringVector lineTokens = fileContents[ii];
unsigned int lineTokenIndex = 1;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu->psWorkVector[0]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psWorkVector[0]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psWorkVector[0]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
offset -= 3;
amoebaGpu->psWorkVector[1]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psWorkVector[1]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psWorkVector[1]->_pSysData[offset++] += conversion*static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
}
amoebaGpu->psWorkVector[0]->Upload();
amoebaGpu->psWorkVector[1]->Upload();
}
#endif
// post matrix multiply
kSorUpdateMutualInducedField_kernel<<< numBlocks, numThreads >>>(
......
......@@ -353,28 +353,6 @@ __device__ void calculateWcaDispersionPairIxn_kernel( float4 atomCoordinatesI, f
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaWcaDispersion.h"
// reduce psWorkArray_3_1 -> outputArray
static void kReduceWcaDispersion(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray )
{
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, outputArray->_pDevData, 0 );
LAUNCHERROR("kReduceWcaDispersion");
}
// reduce psWorkArray_3_1 -> outputArray
static void kReduceWcaDispersionToFloat4(amoebaGpuContext amoebaGpu, CUDAStream<float4>* outputArray )
{
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFieldsToFloat4_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, outputArray->_pDevData );
LAUNCHERROR("kReduceWcaDispersion");
}
/**---------------------------------------------------------------------------------------
Compute WCA dispersion
......@@ -394,15 +372,6 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
gpuContext gpu = amoebaGpu->gpuContext;
#ifdef AMOEBA_DEBUG
static const char* methodName = "kCalculateAmoebaWcaDispersionForces";
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysData, 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload();
int targetAtom = 3;
#endif
// set threads/block first time through
if( threadsPerBlock == 0 ){
......@@ -427,222 +396,17 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
#endif
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaWcaDispersionN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(WcaDispersionParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevData,
gpu->psPosq4->_pDevData,
amoebaGpu->psWcaDispersionRadiusEpsilon->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_1->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_1->_pDevData );
#endif
amoebaGpu->psWorkUnit->_pDevData );
} else {
kCalculateAmoebaWcaDispersionN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(WcaDispersionParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevData,
gpu->psPosq4->_pDevData,
amoebaGpu->psWcaDispersionRadiusEpsilon->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_1->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_1->_pDevData );
#endif
amoebaGpu->psWorkUnit->_pDevData );
}
LAUNCHERROR("kCalculateAmoebaWcaDispersion");
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
amoebaGpu->psWorkArray_3_1->Download();
debugArray->Download();
(void) fprintf( amoebaGpu->log,"\n" );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
double sum = 0.0;
double sums[8] = { 0.0, 0.0, 0.0, 0.0,0.0, 0.0, 0.0, 0.0 };
std::map<int,double> buffers;
std::vector< std::vector<double> > trackD;
std::vector< std::vector<double> > trackT;
std::vector<double> maxD;
std::vector< std::vector<int> > trackI;
trackD.resize( gpu->natoms );
trackT.resize( gpu->natoms );
maxD.resize( gpu->natoms );
trackI.resize( gpu->natoms );
for( int jj = 0; jj < gpu->natoms; jj++ ){
unsigned int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d %5d DebugWca\n", targetAtom, jj );
int block = -1;
for( int kk = 0; kk < -3; kk++ ){
if( kk == 1 ){
block = static_cast<int>(debugArray->_pSysData[debugIndex].w + 1.0e-04);
if( buffers.find(block) == buffers.end() ){
buffers[block] = 0.0;
}
}
if( kk == 1 && jj != targetAtom ){
sums[0] += debugArray->_pSysData[debugIndex].y;
sums[1] += debugArray->_pSysData[debugIndex].z;
sums[2] += debugArray->_pSysData[debugIndex].w;
double x4 = debugArray->_pSysData[debugIndex].x - (debugArray->_pSysData[debugIndex].y + debugArray->_pSysData[debugIndex].z + debugArray->_pSysData[debugIndex].w);
sums[3] += x4;
//sum += debugArray->_pSysData[debugIndex].x;
sum += debugArray->_pSysData[debugIndex].z;
buffers[block] += debugArray->_pSysData[debugIndex].z;
(void) fprintf( amoebaGpu->log," %16.9e [%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w, x4);
} else if( kk == 2 && jj != targetAtom){
//sum += debugArray->_pSysData[debugIndex].x;
sum += debugArray->_pSysData[debugIndex].z;
sums[4] += debugArray->_pSysData[debugIndex].y;
sums[5] += debugArray->_pSysData[debugIndex].z;
sums[6] += debugArray->_pSysData[debugIndex].w;
double x4 = debugArray->_pSysData[debugIndex].x - (debugArray->_pSysData[debugIndex].y + debugArray->_pSysData[debugIndex].z + debugArray->_pSysData[debugIndex].w);
sums[7] += x4;
buffers[block] += debugArray->_pSysData[debugIndex].z;
(void) fprintf( amoebaGpu->log," %16.9e [%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w, x4);
} else {
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e] %7u\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w, debugIndex );
}
if( kk == 5 )(void) fprintf( amoebaGpu->log,"\n" );
debugIndex += paddedNumberOfAtoms;
}
block = static_cast<int>(debugArray->_pSysData[debugIndex+paddedNumberOfAtoms].w + 1.0e-04);
if( buffers.find(block) == buffers.end() ){
buffers[block] = 0.0;
maxD[block] = 0.0;
}
for( int kk = 0; kk < 3; kk++ ){
if( kk == 0 && jj != targetAtom ){
sum += debugArray->_pSysData[debugIndex].z;
buffers[block] += debugArray->_pSysData[debugIndex].z;
trackI[block].push_back( jj );
trackD[block].push_back( debugArray->_pSysData[debugIndex].z );
trackT[block].push_back( debugArray->_pSysData[debugIndex].w );
if( fabs( debugArray->_pSysData[debugIndex].w ) > maxD[block] ){
maxD[block] = fabs( debugArray->_pSysData[debugIndex].w );
}
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w);
} else if( kk == 2 && jj != targetAtom){
sum += debugArray->_pSysData[debugIndex].z;
buffers[block] += debugArray->_pSysData[debugIndex].z;
trackI[block].push_back( jj );
trackD[block].push_back( debugArray->_pSysData[debugIndex].z );
trackT[block].push_back( debugArray->_pSysData[debugIndex].w );
if( fabs( debugArray->_pSysData[debugIndex].w ) > maxD[block] ){
maxD[block] = fabs( debugArray->_pSysData[debugIndex].w );
}
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w);
} else {
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e] %7u\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w, debugIndex );
}
if( kk == 5 )(void) fprintf( amoebaGpu->log,"\n" );
debugIndex += paddedNumberOfAtoms;
}
(void) fprintf( amoebaGpu->log,"\n" );
}
(void) fprintf( amoebaGpu->log,"Total sum=%14.7e\n", sum );
(void) fprintf( amoebaGpu->log,"DeWW\n" );
sum = 0.0;
for( int jj = 0; jj < 4; jj++ ){
sum += sums[jj] + sums[jj+4];
(void) fprintf( amoebaGpu->log,"[%14.7e %14.7e] %14.7e\n", sums[jj], sums[jj+4], sums[jj] + sums[jj+4] );
}
(void) fprintf( amoebaGpu->log,"Total sum8=%14.7e\n", sum );
(void) fprintf( amoebaGpu->log,"Buffers\n", sum );
sum = 0.0;
for( std::map<int,double>::const_iterator jj = buffers.begin(); jj != buffers.end(); jj++ ){
(void) fprintf( amoebaGpu->log,"%5d %14.7e", jj->first, jj->second );
sum += jj->second;
int block = jj->first;
double sumBlock = 0.0;
for( unsigned int kk = 0; kk < trackI[block].size(); kk++ ){
sumBlock += trackD[block][kk];
}
(void) fprintf( amoebaGpu->log," %5u Total=%14.7e MaxD=%14.7e\n", trackI[block].size(), sumBlock, maxD[block]);
for( unsigned int kk = 0; kk < trackI[block].size(); kk++ ){
(void) fprintf( amoebaGpu->log,"[%5d %14.7e %14.7e] ", trackI[block][kk], trackD[block][kk], trackT[block][kk]);
if( ((kk+1)%3) == 0 )(void) fprintf( amoebaGpu->log,"\n" );
}
(void) fprintf( amoebaGpu->log,"\n\n\n" );
}
(void) fprintf( amoebaGpu->log,"Total buffer sum=%14.7e\n", sum );
/*
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
int debugIndex4 = debugIndex + 4*paddedNumberOfAtoms;
int debugIndex5 = debugIndex + 5*paddedNumberOfAtoms;
int debugIndex9 = debugIndex + 9*paddedNumberOfAtoms;
int debugIndex10 = debugIndex + 10*paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"%6d %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e %14.7e \n", jj,
debugArray->_pSysData[debugIndex4].x*debugArray->_pSysData[debugIndex4].x, // r2
debugArray->_pSysData[debugIndex4].y, debugArray->_pSysData[debugIndex9].y, // de
debugArray->_pSysData[debugIndex5].x, debugArray->_pSysData[debugIndex10].x, // dll
debugArray->_pSysData[debugIndex5].y, debugArray->_pSysData[debugIndex10].y, // duu
debugArray->_pSysData[debugIndex4].z, debugArray->_pSysData[debugIndex9].z ); // emxio
}
*/
}
#endif
kReduceWcaDispersionToFloat4( amoebaGpu, gpu->psForce4 );
#ifdef AMOEBA_DEBUG
if( 0 && amoebaGpu->log ){
gpu->psEnergy->Download();
double sum = 0.0;
for (int i = 0; i < gpu->sim.energyOutputBuffers; i++){
sum += gpu->psEnergy->_pSysData[i];
if( fabsf( (*gpu->psEnergy)[i]) > 0.0 )
(void) fprintf( amoebaGpu->log,"SumQQ %6d %14.7e QQ SUM\n", i, (*gpu->psEnergy)[i] );
}
(void) fprintf( amoebaGpu->log,"%14.7e QQ SUM\n", sum );
}
#endif
if( 0 ){
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* psTempForce = new CUDAStream<float>(4*paddedNumberOfAtoms, 1, "psTempForce");
kClearFloat( amoebaGpu, 4*paddedNumberOfAtoms, psTempForce );
kReduceWcaDispersion( amoebaGpu, psTempForce );
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTempForce, outputVector, NULL, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "CudaWca", fileId, outputVector );
delete psTempForce;
//exit(0);
}
#ifdef AMOEBA_DEBUG
delete debugArray;
//exit(0);
#endif
// ---------------------------------------------------------------------------------------
}
......@@ -32,15 +32,8 @@ __launch_bounds__(192, 1)
#else
__launch_bounds__(64, 1)
#endif
void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
unsigned int* workUnit,
float4* atomCoord,
float2* wcaDispersionParameters,
float* outputForce
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
#endif
){
void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)( unsigned int* workUnit ){
extern __shared__ WcaDispersionParticle sA[];
......@@ -56,9 +49,6 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
float jEpsilon;
float totalEnergy = 0.0f;
#ifdef AMOEBA_DEBUG
float4 pullDebug[3];
#endif
while (pos < end)
{
......@@ -76,9 +66,9 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
WcaDispersionParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
float4 iCoord = atomCoord[atomI];
float iRadius = wcaDispersionParameters[atomI].x;
float iEpsilon = wcaDispersionParameters[atomI].y;
float4 iCoord = cSim.pPosq[atomI];
float iRadius = cAmoebaSim.pWcaDispersionRadiusEpsilon[atomI].x;
float iEpsilon = cAmoebaSim.pWcaDispersionRadiusEpsilon[atomI].y;
float forceSum[3];
......@@ -100,7 +90,7 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
if (lasty != y)
{
loadWcaDispersionShared( &(sA[threadIdx.x]), (y+tgx), atomCoord, wcaDispersionParameters );
loadWcaDispersionShared( &(sA[threadIdx.x]), (y+tgx), cSim.pPosq, cAmoebaSim.pWcaDispersionRadiusEpsilon );
}
......@@ -124,66 +114,8 @@ void METHOD_NAME(kCalculateAmoebaWcaDispersion, _kernel)(
iRadius,jRadius,
rmixo, rmixh,
emixo, emixh,
ijForce, &energy
#ifdef AMOEBA_DEBUG
, pullDebug
#endif
);
#ifdef AMOEBA_DEBUG
if( (atomI == targetAtom) || ( (y+tj) == targetAtom ) ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
//debugArray[index].z = (float) cSim.atoms;
debugArray[index].z = atomI == (y+tj) ? 0.0f : energy;
energy = ( (atomI != (y+tj)) && (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ) ? (energy) : 0.0f;
debugArray[index].w = energy+totalEnergy;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = iCoord.x;
debugArray[index].y = iCoord.y;
debugArray[index].z = iCoord.z;
debugArray[index].w = (float) (blockIdx.x * blockDim.x + threadIdx.x);
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = jCoord.x;
debugArray[index].y = jCoord.y;
debugArray[index].z = jCoord.z;
debugArray[index].w = -4.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = emixo;
debugArray[index].y = emixh;
debugArray[index].z = rmixo;
debugArray[index].w = rmixh;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = pullDebug[0].w;
#if 0
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[2].x;
debugArray[index].y = pullDebug[2].y;
debugArray[index].z = pullDebug[2].z;
debugArray[index].w = pullDebug[2].w;
#endif
ijForce, &energy);
} else {
// energy = 0.0f;
}
#endif
if( (atomI != (y+tj)) && (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ){
// add to field at atomI the field due atomJ's dipole
......@@ -209,54 +141,8 @@ debugArray[index].w = pullDebug[2].w;
jRadius,iRadius,
rmjxo, rmjxh,
emjxo, emjxh,
ijForce, &energy
#ifdef AMOEBA_DEBUG
, pullDebug
#endif
);
#ifdef AMOEBA_DEBUG
if( (atomI == targetAtom) || ( (y+tj) == targetAtom ) ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
index += 2*cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = atomI == (y+tj) ? 0.0f : energy;
energy = ( (atomI != (y+tj)) && (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ) ? (energy) : 0.0f;
debugArray[index].w = energy+totalEnergy;
//debugArray[index].w = -2.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = emjxo;
debugArray[index].y = emjxh;
debugArray[index].z = rmjxo;
debugArray[index].w = rmjxh;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = pullDebug[0].w;
#if 0
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[2].x;
debugArray[index].y = pullDebug[2].y;
debugArray[index].z = pullDebug[2].z;
debugArray[index].w = pullDebug[2].w;
#endif
ijForce, &energy);
} else {
//energy = 0.0f;
}
#endif
if( (atomI != (y+tj)) && (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ){
// add to field at atomI the field due atomJ's dipole
......@@ -281,24 +167,32 @@ debugArray[index].w = pullDebug[2].w;
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, forceSum, outputForce );
unsigned int offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, forceSum, cSim.pForce4);
offset = 3*(y + tgx + warp*cSim.paddedNumberOfAtoms);
// include diagonal only once
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].force, outputForce );
if( x != y ){
offset = (y + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, sA[threadIdx.x].force, cSim.pForce4);
}
#else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, forceSum, outputForce );
unsigned int offset = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, forceSum, cSim.pForce4);
offset = 3*(y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].force, outputForce );
// include diagonal only once
if( x != y ){
offset = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, sA[threadIdx.x].force, cSim.pForce4 );
}
#endif
lasty = y;
pos++;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] -= cAmoebaSim.awater*totalEnergy;
if( (blockIdx.x*blockDim.x + threadIdx.x) == 0 ){
cSim.pEnergy[0] += cAmoebaSim.totalMaxWcaDispersionEnergy;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment