Commit 2ef05285 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Direct space PME

parent c22f00cf
......@@ -38,8 +38,7 @@ ENDIF(LOG)
# The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
#SET(OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference)
SET(OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi )
SET(OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS . openmmapi platforms/reference)
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
......@@ -103,8 +102,10 @@ FOREACH(subdir ${OPENMM_AMOEBA_PLUGIN_SOURCE_SUBDIRS})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
ENDFOREACH(subdir)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src)
#INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src/SimTKReference)
INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src)
INCLUDE_DIRECTORIES(BEFORE ${OPENMM_DIR}/platforms/reference/src/SimTKReference)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/platforms/reference/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/platforms/reference/src/SimTKReference)
# ----------------------------------------------------------------------------
IF(LOG)
......@@ -158,7 +159,7 @@ IF(OPENMM_BUILD_STATIC_LIB)
TARGET_LINK_LIBRARIES( ${STATIC_AMOEBA_TARGET} ${STATIC_TARGET} )
ENDIF(OPENMM_BUILD_STATIC_LIB)
#ADD_SUBDIRECTORY(platforms/reference/tests)
ADD_SUBDIRECTORY(platforms/reference/tests)
# Which hardware platforms to build
IF(CUDA_FOUND)
......
......@@ -113,6 +113,20 @@ public:
*/
void setCutoffDistance(double distance);
/**
* Get the aEwald parameter
*
* @return the Ewald parameter
*/
double getAEwald() const;
/**
* Set the aEwald parameter
*
* @param Ewald parameter
*/
void setAEwald(double aewald);
/**
* Add multipole-related info for a particle
*
......@@ -279,6 +293,7 @@ private:
AmoebaNonbondedMethod nonbondedMethod;
double cutoffDistance;
double aewald;
MutualInducedIterationMethod mutualInducedIterationMethod;
int mutualInducedMaxIterations;
double mutualInducedTargetEpsilon;
......
......@@ -56,6 +56,14 @@ void AmoebaMultipoleForce::setCutoffDistance(double distance) {
cutoffDistance = distance;
}
double AmoebaMultipoleForce::getAEwald() const {
return aewald;
}
void AmoebaMultipoleForce::setAEwald(double inputAewald ) {
aewald = inputAewald;
}
AmoebaMultipoleForce::MutualInducedIterationMethod AmoebaMultipoleForce::getMutualInducedIterationMethod( void ) const {
return mutualInducedIterationMethod;
}
......
......@@ -589,6 +589,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
static_cast<float>( force.getMutualInducedTargetEpsilon()),
nonbondedMethod,
static_cast<float>( force.getCutoffDistance()),
static_cast<float>( force.getAEwald()),
static_cast<float>( force.getElectricConstant()) );
if (nonbondedMethod == AmoebaMultipoleForce::PME) {
double alpha;
......
......@@ -350,6 +350,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " pP_ScaleIndices %p\n", amoebaGpu->amoebaSim.pP_ScaleIndices );
(void) fprintf( log, " pM_ScaleIndices %p\n", amoebaGpu->amoebaSim.pM_ScaleIndices );
(void) fprintf( log, " sqrtPi %15.7e\n", amoebaGpu->amoebaSim.sqrtPi );
(void) fprintf( log, " alpha Ewald %15.7e\n", gpu->sim.alphaEwald );
(void) fprintf( log, " cutoffDistance2 %15.7e\n", amoebaGpu->amoebaSim.cutoffDistance2 );
(void) fprintf( log, " electric %15.7e\n", amoebaGpu->amoebaSim.electric );
(void) fprintf( log, " box %15.7e %15.7e %15.7e\n", gpu->sim.periodicBoxSizeX, gpu->sim.periodicBoxSizeY, gpu->sim.periodicBoxSizeZ);
......@@ -1463,7 +1464,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const std::vector< std::vector< std::vector<int> > >& multipoleParticleCovalentInfo, const std::vector<int>& covalentDegree,
const std::vector<int>& minCovalentIndices, const std::vector<int>& minCovalentPolarizationIndices, int maxCovalentRange,
int mutualInducedIterativeMethod, int mutualInducedMaxIterations, float mutualInducedTargetEpsilon,
int nonbondedMethod, float cutoffDistance, float electricConstant ){
int nonbondedMethod, float cutoffDistance, float alphaEwald, float electricConstant ){
// ---------------------------------------------------------------------------------------
......@@ -1565,9 +1566,8 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->amoebaSim.cutoffDistance2 = cutoffDistance*cutoffDistance;
amoebaGpu->amoebaSim.sqrtPi = sqrt( 3.1415926535897932384626433832795 );
amoebaGpu->amoebaSim.electric = electricConstant;
amoebaGpu->gpuContext->sim.alphaEwald = alphaEwald;
amoebaGpu->gpuContext->sim.nonbondedCutoff = cutoffDistance;
tabulateErfc(amoebaGpu->gpuContext);
if( amoebaGpu->amoebaSim.dielec < 1.0e-05 ){
amoebaGpu->amoebaSim.dielec = 1.0f;
......@@ -2766,7 +2766,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaPmeMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeFixedEFieldSim( amoebaGpu );
SetCalculateAmoebaElectrostaticSim( amoebaGpu );
SetCalculateAmoebaRealSpaceEwaldSim( amoebaGpu );
SetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpu );
SetCalculateAmoebaCudaMapTorquesSim( amoebaGpu );
SetCalculateAmoebaKirkwoodSim( amoebaGpu );
SetCalculateAmoebaKirkwoodEDiffSim( amoebaGpu );
......
......@@ -106,9 +106,9 @@ extern void SetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaRealSpaceEwaldSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaRealSpaceEwaldSim( amoebaGpuContext amoebaGpu );
extern void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaCudaMapTorquesSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaMapTorquesSim(amoebaGpuContext gpu);
......
......@@ -307,7 +307,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
const std::vector< std::vector< std::vector<int> > >& multipoleAtomCovalentInfo, const std::vector<int>& covalentDegree,
const std::vector<int>& minCovalentIndices, const std::vector<int>& minCovalentPolarizationIndices, int maxCovalentRange,
int mutualInducedIterationMethod, int mutualInducedMaxIterations, float mutualInducedTargetEpsilon,
int nonbondedMethod, float cutoffDistance, float electricConstant );
int nonbondedMethod, float cutoffDistance, float alphaEwald, float electricConstant );
extern "C"
......
......@@ -11,34 +11,24 @@
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaRealSpaceEwaldSim(amoebaGpuContext amoebaGpu)
void SetCalculateAmoebaPmeDirectElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
RTERROR(status, "SetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
RTERROR(status, "SetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaRealSpaceEwaldSim(amoebaGpuContext amoebaGpu)
void GetCalculateAmoebaPmeDirectElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
RTERROR(status, "GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaRealSpaceEwaldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
texture<float, 1, cudaReadModeElementType> tabulatedErfcRef;
static __device__ float fastErfc(float r)
{
float normalized = cSim.tabulatedErfcScale*r;
int index = (int) normalized;
float fract2 = normalized-index;
float fract1 = 1.0f-fract2;
return fract1*tex1Dfetch(tabulatedErfcRef, index) + fract2*tex1Dfetch(tabulatedErfcRef, index+1);
RTERROR(status, "GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
static int const PScaleIndex = 0;
......@@ -60,7 +50,7 @@ static int const MScaleIndex = 3;
//static int const Ddsc72Index = 16;
static int const LastScalingIndex = 4;
struct RealSpaceEwaldParticle {
struct PmeDirectElectrostaticParticle {
// coordinates charge
......@@ -97,9 +87,50 @@ struct RealSpaceEwaldParticle {
};
__device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& atomI, RealSpaceEwaldParticle& atomJ,
float scalingDistanceCutoff, float* scalingFactors,
float* outputForce, float outputTorque[2][3],
// self-energy for PME
__device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy)
{
float term = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float fterm = -(cAmoebaSim.electric/cAmoebaSim.dielec)*cSim.alphaEwald/cAmoebaSim.sqrtPi;
float cii = atomI.q*atomI.q;
float dii = atomI.labFrameDipole[0]*atomI.labFrameDipole[0] +
atomI.labFrameDipole[1]*atomI.labFrameDipole[1] +
atomI.labFrameDipole[2]*atomI.labFrameDipole[2];
float qii = atomI.labFrameQuadrupole[0]*atomI.labFrameQuadrupole[0] +
atomI.labFrameQuadrupole[4]*atomI.labFrameQuadrupole[4] +
atomI.labFrameQuadrupole[8]*atomI.labFrameQuadrupole[8] + 2.0f*(
atomI.labFrameQuadrupole[1]*atomI.labFrameQuadrupole[1] +
atomI.labFrameQuadrupole[2]*atomI.labFrameQuadrupole[2] +
atomI.labFrameQuadrupole[5]*atomI.labFrameQuadrupole[5]);
float uii = atomI.labFrameDipole[0]*atomI.inducedDipole[0] + atomI.labFrameDipole[1]*atomI.inducedDipole[1] + atomI.labFrameDipole[2]*atomI.inducedDipole[2];
*energy = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f));
*energy += term*uii/3.0f;
*energy *= fterm;
}
// self-torque for PME
__device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI)
{
float term = (4.0f/3.0f)*(cAmoebaSim.electric/cAmoebaSim.dielec)*(cSim.alphaEwald*cSim.alphaEwald*cSim.alphaEwald)/cAmoebaSim.sqrtPi;
float uix = 0.5f*(atomI.inducedDipole[0] + atomI.inducedDipoleP[0]);
float uiy = 0.5f*(atomI.inducedDipole[1] + atomI.inducedDipoleP[1]);
float uiz = 0.5f*(atomI.inducedDipole[2] + atomI.inducedDipoleP[2]);
atomI.torque[0] += term*(atomI.labFrameDipole[1]*uiz - atomI.labFrameDipole[2]*uiy);
atomI.torque[1] += term*(atomI.labFrameDipole[2]*uix - atomI.labFrameDipole[0]*uiz);
atomI.torque[2] += term*(atomI.labFrameDipole[0]*uiy - atomI.labFrameDipole[1]*uix);
}
__device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, PmeDirectElectrostaticParticle& atomJ,
float* scalingFactors, float* outputForce, float outputTorque[2][3],
float* energy
#ifdef AMOEBA_DEBUG
,float4* debugArray
......@@ -107,20 +138,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
){
float e,ei,f;
//float gfd,gfdr;
//float xix,yix,zix;
//float xiy,yiy,ziy;
//float xiz,yiz,ziz;
//float xkx,ykx,zkx;
//float xky,yky,zky;
//float xkz,ykz,zkz;
//float rr1,rr3;
//float rr5,rr7,rr9,rr11;
float e,ei;
float erl,erli;
//float frcxi[4],frcxk[4];
//float frcyi[4],frcyk[4];
//float frczi[4],frczk[4];
float di[4],qi[10];
float dk[4],qk[10];
float fridmp[4],findmp[4];
......@@ -130,7 +149,6 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float ttm2i[4],ttm3i[4];
float ttm2r[4],ttm3r[4];
float ttm2ri[4],ttm3ri[4];
//float fdir[4]
float dixdk[4];
float dkxui[4],dixuk[4];
float dixukp[4],dkxuip[4];
......@@ -159,7 +177,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float gfr[8],gfri[7];
float gti[7],gtri[7];
f = (cAmoebaSim.electric/cAmoebaSim.dielec);
float conversionFactor = (cAmoebaSim.electric/cAmoebaSim.dielec);
// set the permanent multipole and induced dipole values;
......@@ -193,6 +211,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float r2 = xr*xr + yr*yr + zr*zr;
if( r2 <= cAmoebaSim.cutoffDistance2 ){
float r = sqrt(r2);
float ck = atomJ.q;
......@@ -214,7 +233,7 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float ralpha = cSim.alphaEwald*r;
bn[0] = fastErfc(ralpha)/r;
bn[0] = erfc(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 0.0f;
......@@ -250,17 +269,17 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
float scale5 = 1.0f;
float scale7 = 1.0f;
ddsc3[0] = 0.0f;
ddsc3[1] = 0.0f;
ddsc3[2] = 0.0f;
ddsc3[3] = 0.0f;
ddsc5[0] = 0.0f;
ddsc5[1] = 0.0f;
ddsc5[2] = 0.0f;
ddsc5[3] = 0.0f;
ddsc7[0] = 0.0f;
ddsc7[1] = 0.0f;
ddsc7[2] = 0.0f;
ddsc7[3] = 0.0f;
float pdk = atomJ.damp;
float ptk = atomJ.thole;
......@@ -280,9 +299,11 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
ddsc3[1] = temp3 * xr;
ddsc3[2] = temp3 * yr;
ddsc3[3] = temp3 * zr;
ddsc5[1] = temp5 * ddsc3[1];
ddsc5[2] = temp5 * ddsc3[2];
ddsc5[3] = temp5 * ddsc3[3];
ddsc7[1] = temp7 * ddsc5[1];
ddsc7[2] = temp7 * ddsc5[2];
ddsc7[3] = temp7 * ddsc5[3];
......@@ -508,8 +529,8 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+ rr7*gli[3]*psc7);
e = e - (1.0f-scalingFactors[MScaleIndex])*erl;
ei = ei - erli;
e = f * e;
ei = f * ei;
e = conversionFactor * e;
ei = conversionFactor * ei;
// em = em + e;
// ep = ep + ei;
......@@ -679,12 +700,9 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
+(glip[2]+glip[7])*scalingFactors[DScaleIndex]);
float temp7 = 0.5f * rr7 * (gli[3]*scalingFactors[PScaleIndex]
+glip[3]*scalingFactors[DScaleIndex]);
fridmp[1] = temp3*ddsc3[1] + temp5*ddsc5[1]
+ temp7*ddsc7[1];
fridmp[2] = temp3*ddsc3[2] + temp5*ddsc5[2]
+ temp7*ddsc7[2];
fridmp[3] = temp3*ddsc3[3] + temp5*ddsc5[3]
+ temp7*ddsc7[3];
fridmp[1] = temp3*ddsc3[1] + temp5*ddsc5[1] + temp7*ddsc7[1];
fridmp[2] = temp3*ddsc3[2] + temp5*ddsc5[2] + temp7*ddsc7[2];
fridmp[3] = temp3*ddsc3[3] + temp5*ddsc5[3] + temp7*ddsc7[3];
// find some scaling terms for induced-induced force
......@@ -833,34 +851,138 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
// handle the case where scaling is used
for( int j = 1; j <= 3; j++ ){
ftm2[j] = f * (ftm2[j]-(1.0f-scalingFactors[MScaleIndex])*ftm2r[j]);
ftm2i[j] = f * (ftm2i[j]-ftm2ri[j]);
ttm2[j] = f * (ttm2[j]-(1.0f-scalingFactors[MScaleIndex])*ttm2r[j]);
ttm2i[j] = f * (ttm2i[j]-ttm2ri[j]);
ttm3[j] = f * (ttm3[j]-(1.0f-scalingFactors[MScaleIndex])*ttm3r[j]);
ttm3i[j] = f * (ttm3i[j]-ttm3ri[j]);
ftm2[j] = (ftm2[j]-(1.0f-scalingFactors[MScaleIndex])*ftm2r[j]);
ftm2i[j] = (ftm2i[j]-ftm2ri[j]);
ttm2[j] = (ttm2[j]-(1.0f-scalingFactors[MScaleIndex])*ttm2r[j]);
ttm2i[j] = (ttm2i[j]-ttm2ri[j]);
ttm3[j] = (ttm3[j]-(1.0f-scalingFactors[MScaleIndex])*ttm3r[j]);
ttm3i[j] = (ttm3i[j]-ttm3ri[j]);
}
// increment gradient due to force and torque on first site;
outputForce[0] = -conversionFactor*(ftm2[1] + ftm2i[1]);
outputForce[1] = -conversionFactor*(ftm2[2] + ftm2i[2]);
outputForce[2] = -conversionFactor*(ftm2[3] + ftm2i[3]);
outputTorque[0][0] = conversionFactor*(ttm2[1] + ttm2i[1]);
outputTorque[0][1] = conversionFactor*(ttm2[2] + ttm2i[2]);
outputTorque[0][2] = conversionFactor*(ttm2[3] + ttm2i[3]);
outputTorque[1][0] = conversionFactor*(ttm3[1] + ttm3i[1]);
outputTorque[1][1] = conversionFactor*(ttm3[2] + ttm3i[2]);
outputTorque[1][2] = conversionFactor*(ttm3[3] + ttm3i[3]);
//outputTorque[1][2] = conversionFactor*(ttm3_2 + ttm3i_2);
#ifdef AMOEBA_DEBUG
int debugIndex = 0;
float idTracker = 1.0f;
/*
dem(1,ii) = dem(1,ii) + ftm2[1];
dem(2,ii) = dem(2,ii) + ftm2[2];
dem(3,ii) = dem(3,ii) + ftm2[3];
dep(1,ii) = dep(1,ii) + ftm2i[1];
dep(2,ii) = dep(2,ii) + ftm2i[2];
dep(3,ii) = dep(3,ii) + ftm2i[3];
call torque (i,ttm2,ttm2i,frcxi,frcyi,frczi);
// increment gradient due to force and torque on second site;
dem(1,kk) = dem(1,kk) - ftm2[1];
dem(2,kk) = dem(2,kk) - ftm2[2];
dem(3,kk) = dem(3,kk) - ftm2[3];
dep(1,kk) = dep(1,kk) - ftm2i[1];
dep(2,kk) = dep(2,kk) - ftm2i[2];
dep(3,kk) = dep(3,kk) - ftm2i[3];
call torque (k,ttm3,ttm3i,frcxk,frcyk,frczk);
debugArray[debugIndex].x = atomI.labFrameDipole[0];
debugArray[debugIndex].y = atomI.labFrameDipole[1];
debugArray[debugIndex].z = atomI.labFrameDipole[2];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.labFrameDipole[0];
debugArray[debugIndex].y = atomJ.labFrameDipole[1];
debugArray[debugIndex].z = atomJ.labFrameDipole[2];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipole[0];
debugArray[debugIndex].y = atomI.inducedDipole[1];
debugArray[debugIndex].z = atomI.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipole[0];
debugArray[debugIndex].y = atomJ.inducedDipole[1];
debugArray[debugIndex].z = atomJ.inducedDipole[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomI.inducedDipoleP[0];
debugArray[debugIndex].y = atomI.inducedDipoleP[1];
debugArray[debugIndex].z = atomI.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = atomJ.inducedDipoleP[0];
debugArray[debugIndex].y = atomJ.inducedDipoleP[1];
debugArray[debugIndex].z = atomJ.inducedDipoleP[2];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm2[1];
debugArray[debugIndex].y = conversionFactor*ftm2[2];
debugArray[debugIndex].z = conversionFactor*ftm2[3];
debugArray[debugIndex].w = idTracker;
debugIndex++;
*/
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ftm2i[1];
debugArray[debugIndex].y = conversionFactor*ftm2i[2];
debugArray[debugIndex].z = conversionFactor*ftm2i[3];
debugArray[debugIndex].w = r2;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*fridmp[1];
debugArray[debugIndex].y = conversionFactor*fridmp[2];
debugArray[debugIndex].z = conversionFactor*fridmp[3];
debugArray[debugIndex].w = cSim.alphaEwald;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*findmp[1];
debugArray[debugIndex].y = conversionFactor*findmp[2];
debugArray[debugIndex].z = conversionFactor*findmp[3];
debugArray[debugIndex].w = cSim.alphaEwald + 1.0f;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ttm2[1];
debugArray[debugIndex].y = conversionFactor*ttm2[2];
debugArray[debugIndex].z = conversionFactor*ttm2[3];
debugArray[debugIndex].w = idTracker;
debugIndex++;
idTracker += 1.0;
debugArray[debugIndex].x = conversionFactor*ttm2i[1];
debugArray[debugIndex].y = conversionFactor*ttm2i[2];
debugArray[debugIndex].z = conversionFactor*ttm2i[3];
debugArray[debugIndex].w = idTracker;
#endif
} else {
outputForce[0] = 0.0f;
outputForce[1] = 0.0f;
outputForce[2] = 0.0f;
outputTorque[0][0] = 0.0f;
outputTorque[0][1] = 0.0f;
outputTorque[0][2] = 0.0f;
outputTorque[1][0] = 0.0f;
outputTorque[1][1] = 0.0f;
outputTorque[1][2] = 0.0f;
#ifdef AMOEBA_DEBUG
for( int ii = 0; ii < 20; ii++ ){
debugArray[ii].x = 0.0f;
debugArray[ii].y = 0.0f;
debugArray[ii].z = 0.0f;
debugArray[ii].w = (float) ii;
}
#endif
}
......@@ -868,49 +990,47 @@ __device__ void calculateRealSpaceEwaldPairIxn_kernel( RealSpaceEwaldParticle& a
}
__device__ void loadRealSpaceEwaldShared( struct RealSpaceEwaldParticle* sA, unsigned int atomI,
float4* atomCoord, float* labFrameDipoleJ, float* labQuadrupole,
float* inducedDipole, float* inducedDipolePolar, float2* dampingFactorAndThole )
__device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticParticle* sA, unsigned int atomI )
{
// coordinates & charge
sA->x = atomCoord[atomI].x;
sA->y = atomCoord[atomI].y;
sA->z = atomCoord[atomI].z;
sA->q = atomCoord[atomI].w;
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole[0] = labFrameDipoleJ[atomI*3];
sA->labFrameDipole[1] = labFrameDipoleJ[atomI*3+1];
sA->labFrameDipole[2] = labFrameDipoleJ[atomI*3+2];
sA->labFrameDipole[0] = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole[1] = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole[2] = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole[0] = labQuadrupole[atomI*9];
sA->labFrameQuadrupole[1] = labQuadrupole[atomI*9+1];
sA->labFrameQuadrupole[2] = labQuadrupole[atomI*9+2];
sA->labFrameQuadrupole[3] = labQuadrupole[atomI*9+3];
sA->labFrameQuadrupole[4] = labQuadrupole[atomI*9+4];
sA->labFrameQuadrupole[5] = labQuadrupole[atomI*9+5];
sA->labFrameQuadrupole[6] = labQuadrupole[atomI*9+6];
sA->labFrameQuadrupole[7] = labQuadrupole[atomI*9+7];
sA->labFrameQuadrupole[8] = labQuadrupole[atomI*9+8];
sA->labFrameQuadrupole[0] = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole[1] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole[2] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole[3] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+3];
sA->labFrameQuadrupole[4] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole[5] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole[6] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+6];
sA->labFrameQuadrupole[7] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+7];
sA->labFrameQuadrupole[8] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
// induced dipole
sA->inducedDipole[0] = inducedDipole[atomI*3];
sA->inducedDipole[1] = inducedDipole[atomI*3+1];
sA->inducedDipole[2] = inducedDipole[atomI*3+2];
sA->inducedDipole[0] = cAmoebaSim.pInducedDipole[atomI*3];
sA->inducedDipole[1] = cAmoebaSim.pInducedDipole[atomI*3+1];
sA->inducedDipole[2] = cAmoebaSim.pInducedDipole[atomI*3+2];
// induced dipole polar
sA->inducedDipoleP[0] = inducedDipolePolar[atomI*3];
sA->inducedDipoleP[1] = inducedDipolePolar[atomI*3+1];
sA->inducedDipoleP[2] = inducedDipolePolar[atomI*3+2];
sA->inducedDipoleP[0] = cAmoebaSim.pInducedDipolePolar[atomI*3];
sA->inducedDipoleP[1] = cAmoebaSim.pInducedDipolePolar[atomI*3+1];
sA->inducedDipoleP[2] = cAmoebaSim.pInducedDipolePolar[atomI*3+2];
sA->damp = dampingFactorAndThole[atomI].x;
sA->thole = dampingFactorAndThole[atomI].y;
sA->damp = cAmoebaSim.pDampingFactorAndThole[atomI].x;
sA->thole = cAmoebaSim.pDampingFactorAndThole[atomI].y;
}
......@@ -918,11 +1038,11 @@ __device__ void loadRealSpaceEwaldShared( struct RealSpaceEwaldParticle* sA, uns
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaRealSpaceEwald.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaRealSpaceEwald.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostatic.h"
// reduce psWorkArray_3_1 -> force
// reduce psWorkArray_3_2 -> torque
......@@ -932,23 +1052,27 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psForce->_pDevStream[0] );
LAUNCHERROR("kReduceRealSpaceEwaldForce");
LAUNCHERROR("kReducePmeDirectElectrostaticForce");
kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevStream[0], amoebaGpu->psTorque->_pDevStream[0] );
LAUNCHERROR("kReduceRealSpaceEwaldTorque");
LAUNCHERROR("kReducePmeDirectElectrostaticTorque");
}
//#define GET_INDUCED_DIPOLE_FROM_FILE
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
#include <stdlib.h>
#endif
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque
Compute Amoeba dirrect space portion of electrostatic force & torque
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
......@@ -956,7 +1080,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
static unsigned int threadsPerBlock = 0;
#ifdef AMOEBA_DEBUG
static const char* methodName = "cudaComputeAmoebaRealSpaceEwald";
static const char* methodName = "cudaComputeAmoebaPmeDirectElectrostatic";
static int timestep = 0;
std::vector<int> fileId;
timestep++;
......@@ -983,7 +1107,38 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload();
unsigned int targetAtom = 0;
unsigned int targetAtom = 10;
#endif
#ifdef GET_INDUCED_DIPOLE_FROM_FILE
std::string fileName = "waterInducedDipole.txt";
StringVectorVector fileContents;
readFile( fileName, fileContents );
unsigned int offset = 0;
(void) fprintf( amoebaGpu->log, "Read file: %s %u\n", fileName.c_str(), fileContents.size() ); fflush( amoebaGpu->log );
for( unsigned int ii = 1; ii < fileContents.size()-1; ii++ ){
StringVector lineTokens = fileContents[ii];
unsigned int lineTokenIndex = 1;
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu->psInducedDipole->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psInducedDipole->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psInducedDipole->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
offset -= 3;
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset++] = static_cast<float>(atof(lineTokens[lineTokenIndex++].c_str()));
}
float conversion = 0.1f;
for( int ii = 0; ii < 3*gpu->natoms; ii++ ){
amoebaGpu->psInducedDipole->_pSysStream[0][ii] *= conversion;
amoebaGpu->psInducedDipolePolar->_pSysStream[0][ii] *= conversion;
}
amoebaGpu->gpuContext->sim.alphaEwald = 5.4459052e+00f;
SetCalculateAmoebaPmeDirectElectrostaticSim(amoebaGpu);
amoebaGpu->psInducedDipole->Upload();
amoebaGpu->psInducedDipolePolar->Upload();
#endif
// on first pass, set threads/block
......@@ -996,20 +1151,15 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(RealSpaceEwaldParticle)), maxThreads);
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(PmeDirectElectrostaticParticle)), maxThreads);
}
kClearFields_3( amoebaGpu, 2 );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaRealSpaceEwaldN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(RealSpaceEwaldParticle)*threadsPerBlock>>>(
kCalculateAmoebaPmeDirectElectrostaticN2ByWarpForces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(PmeDirectElectrostaticParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psLabFrameDipole->_pDevStream[0],
amoebaGpu->psLabFrameQuadrupole->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
......@@ -1021,22 +1171,14 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
} else {
#ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaRealSpaceEwaldN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaPmeDirectElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(RealSpaceEwaldParticle), sizeof(RealSpaceEwaldParticle)*threadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
sizeof(PmeDirectElectrostaticParticle), sizeof(PmeDirectElectrostaticParticle)*threadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture(NULL, &tabulatedErfcRef, gpu->psTabulatedErfc->_pDevData, &channelDesc, gpu->psTabulatedErfc->_length*sizeof(float));
kCalculateAmoebaCudaRealSpaceEwaldN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(RealSpaceEwaldParticle)*threadsPerBlock>>>(
kCalculateAmoebaPmeDirectElectrostaticN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(PmeDirectElectrostaticParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psLabFrameDipole->_pDevStream[0],
amoebaGpu->psLabFrameQuadrupole->_pDevStream[0],
amoebaGpu->psInducedDipole->_pDevStream[0],
amoebaGpu->psInducedDipolePolar->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0],
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevStream[0],
......@@ -1045,7 +1187,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif
}
LAUNCHERROR("kCalculateAmoebaCudaRealSpaceEwaldN2Forces");
LAUNCHERROR("kCalculateAmoebaPmeDirectElectrostaticN2Forces");
kReduceForceTorque( amoebaGpu );
#ifdef AMOEBA_DEBUG
......@@ -1055,9 +1197,10 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
amoebaGpu->psTorque->Download();
debugArray->Download();
(void) fprintf( amoebaGpu->log, "Finished RealSpaceEwald kernel execution\n" ); (void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "Finished PmeDirectElectrostatic kernel execution\n" ); (void) fflush( amoebaGpu->log );
int maxPrint = 1400;
float conversion = 1.0f/41.84f;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
......@@ -1065,38 +1208,18 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
// force
(void) fprintf( amoebaGpu->log,"RealSpaceEwaldF [%16.9e %16.9e %16.9e] ",
amoebaGpu->psForce->_pSysStream[0][indexOffset],
amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"PmeDirectElectrostaticF [%16.9e %16.9e %16.9e] ",
conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset],
conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
conversion*amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );
// torque
(void) fprintf( amoebaGpu->log,"RealSpaceEwaldT [%16.9e %16.9e %16.9e] ",
amoebaGpu->psTorque->_pSysStream[0][indexOffset],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+1],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+2] );
// coords
#if 0
(void) fprintf( amoebaGpu->log,"x[%16.9e %16.9e %16.9e] ",
gpu->psPosq4->_pSysStream[0][ii].x,
gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z);
for( int jj = 0; jj < gpu->natoms && jj < 5; jj++ ){
int debugIndex = jj*gpu->natoms + ii;
float xx = gpu->psPosq4->_pSysStream[0][jj].x - gpu->psPosq4->_pSysStream[0][ii].x;
float yy = gpu->psPosq4->_pSysStream[0][jj].y - gpu->psPosq4->_pSysStream[0][ii].y;
float zz = gpu->psPosq4->_pSysStream[0][jj].z - gpu->psPosq4->_pSysStream[0][ii].z;
(void) fprintf( amoebaGpu->log,"\n%4d %4d delta [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e] ",
ii, jj, xx, yy, zz,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y, debugArray->_pSysStream[0][debugIndex].z );
(void) fprintf( amoebaGpu->log,"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] ",
conversion*amoebaGpu->psTorque->_pSysStream[0][indexOffset],
conversion*amoebaGpu->psTorque->_pSysStream[0][indexOffset+1],
conversion*amoebaGpu->psTorque->_pSysStream[0][indexOffset+2] );
}
#endif
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
......@@ -1107,7 +1230,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
for( int kk = 0; kk < 5; kk++ ){
for( int kk = 0; kk < 15; kk++ ){
(void) fprintf( amoebaGpu->log,"%5d %5d [%16.9e %16.9e %16.9e %16.9e] E11\n", targetAtom, jj,
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
......@@ -1118,26 +1241,6 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
}
(void) fflush( amoebaGpu->log );
if( 0 ){
(void) fprintf( amoebaGpu->log, "%s Tiled F & T\n", methodName ); fflush( amoebaGpu->log );
int maxPrint = 12;
for( int ii = 0; ii < gpu->natoms; ii++ ){
// print cpu & gpu reductions
int offset = 3*ii;
(void) fprintf( amoebaGpu->log,"%6d F[%16.7e %16.7e %16.7e] T[%16.7e %16.7e %16.7e]\n", ii,
amoebaGpu->psForce->_pSysStream[0][offset],
amoebaGpu->psForce->_pSysStream[0][offset+1],
amoebaGpu->psForce->_pSysStream[0][offset+2],
amoebaGpu->psTorque->_pSysStream[0][offset],
amoebaGpu->psTorque->_pSysStream[0][offset+1],
amoebaGpu->psTorque->_pSysStream[0][offset+2] );
if( (ii == maxPrint) && (ii < (gpu->natoms - maxPrint)) )ii = gpu->natoms - maxPrint;
}
}
if( 1 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
......@@ -1145,7 +1248,7 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaWriteVectorOfDoubleVectorsToFile( "CudaForceTorque", fileId, outputVector );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeDirectForceTorque", fileId, outputVector );
}
}
......@@ -1155,3 +1258,19 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
kCalculateAmoebaPME( amoebaGpu );
}
......@@ -34,15 +34,8 @@ __launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
unsigned int* workUnit,
float4* atomCoord,
float* labFrameDipole,
float* labFrameQuadrupole,
float* inducedDipole,
float* inducedDipolePolar,
float* outputForce,
float* outputTorque
void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
unsigned int* workUnit, float* outputForce, float* outputTorque
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
......@@ -50,10 +43,11 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
){
#ifdef AMOEBA_DEBUG
int pullIndexMax = 12;
float4 pullBack[20];
#endif
extern __shared__ RealSpaceEwaldParticle sA[];
extern __shared__ PmeDirectElectrostaticParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
......@@ -80,12 +74,10 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
RealSpaceEwaldParticle* psA = &sA[tbx];
PmeDirectElectrostaticParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
RealSpaceEwaldParticle localParticle;
loadRealSpaceEwaldShared(&localParticle, atomI,
atomCoord, labFrameDipole, labFrameQuadrupole,
inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
PmeDirectElectrostaticParticle localParticle;
loadPmeDirectElectrostaticShared(&localParticle, atomI );
localParticle.force[0] = 0.0f;
localParticle.force[1] = 0.0f;
......@@ -105,9 +97,7 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// load shared data
loadRealSpaceEwaldShared( &(sA[threadIdx.x]), atomI,
atomCoord, labFrameDipole, labFrameQuadrupole,
inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), atomI );
if (!bExclusionFlag)
{
......@@ -122,9 +112,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
float torque[2][3];
float energy;
calculateRealSpaceEwaldPairIxn_kernel( localParticle, psA[j],
cAmoebaSim.scalingDistanceCutoff, scalingFactors,
force, torque, &energy
calculatePmeDirectElectrostaticPairIxn_kernel( localParticle, psA[j],
scalingFactors, force, torque, &energy
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -171,9 +160,8 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
// force
float energy;
calculateRealSpaceEwaldPairIxn_kernel( localParticle, psA[j],
cAmoebaSim.scalingDistanceCutoff, scalingFactors,
force, torque, &energy
calculatePmeDirectElectrostaticPairIxn_kernel( localParticle, psA[j],
scalingFactors, force, torque, &energy
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -199,58 +187,54 @@ void METHOD_NAME(kCalculateAmoebaCudaRealSpaceEwald, Forces_kernel)(
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom ){
unsigned int index = (atomI == targetAtom) ? atomJ : atomI;
float blockId = 1.0f;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) atomJ;
debugArray[index].z = 1.0f;
debugArray[index].w = (float) y;
/*
debugArray[index].z = (float) y;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? force[0] : 0.0f;
debugArray[index].y = mask ? force[1] : 0.0f;
debugArray[index].z = mask ? force[2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[0][0] : 0.0f;
debugArray[index].y = mask ? torque[0][1] : 0.0f;
debugArray[index].z = mask ? torque[0][2] : 0.0f;
*/
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
int pullIndex = 0;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
debugArray[index].x = mask ? torque[0][0] : 0.0f;
debugArray[index].y = mask ? torque[0][1] : 0.0f;
debugArray[index].z = mask ? torque[0][2] : 0.0f;
debugArray[index].w = blockId;
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
}
}
#endif
}
}
// include self energy and self torque
if( atomI < cAmoebaSim.numberOfAtoms ){
calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticle );
//float energy;
//calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
//totalEnergy += energy;
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
......@@ -300,9 +284,7 @@ if( atomI == targetAtom ){
{
// load shared data
loadRealSpaceEwaldShared( &(sA[threadIdx.x]), (y+tgx),
atomCoord, labFrameDipole, labFrameQuadrupole,
inducedDipole, inducedDipolePolar, cAmoebaSim.pDampingFactorAndThole );
loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), (y+tgx) );
}
......@@ -325,9 +307,8 @@ if( atomI == targetAtom ){
unsigned int atomJ = y + tj;
float energy;
calculateRealSpaceEwaldPairIxn_kernel( localParticle, psA[tj],
cAmoebaSim.scalingDistanceCutoff, scalingFactors,
force, torque, &energy
calculatePmeDirectElectrostaticPairIxn_kernel( localParticle, psA[tj],
scalingFactors, force, torque, &energy
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -360,60 +341,44 @@ if( atomI == targetAtom ){
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || atomJ == targetAtom ){
unsigned int index = (atomI == targetAtom) ? atomJ : atomI;
unsigned int indexI = (atomI == targetAtom) ? 0 : 1;
unsigned int indexJ = (atomI == targetAtom) ? 1 : 0;
float forceSign = (atomI == targetAtom) ? 1.0f : -1.0f;
float blockId = 2.0f;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) atomJ;
debugArray[index].z = 2.0f;
debugArray[index].w = (float) y;
/*
debugArray[index].z = (float) y;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? forceSign*force[0] : 0.0f;
debugArray[index].y = mask ? forceSign*force[1] : 0.0f;
debugArray[index].z = mask ? forceSign*force[2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
*/
index += cAmoebaSim.paddedNumberOfAtoms;
int pullIndex = 0;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
debugArray[index].w = blockId;
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
}
}
#endif
......@@ -449,9 +414,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// force
float energy;
calculateRealSpaceEwaldPairIxn_kernel( localParticle, psA[tj],
cAmoebaSim.scalingDistanceCutoff, scalingFactors,
force, torque, &energy
calculatePmeDirectElectrostaticPairIxn_kernel( localParticle, psA[tj],
scalingFactors, force, torque, &energy
#ifdef AMOEBA_DEBUG
, pullBack
#endif
......@@ -490,55 +454,39 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned int indexI = (atomI == targetAtom) ? 0 : 1;
unsigned int indexJ = (atomI == targetAtom) ? 1 : 0;
float forceSign = (atomI == targetAtom) ? 1.0f : -1.0f;
float blockId = 3.0f;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) atomJ;
debugArray[index].z = 3.0f;
debugArray[index].w = (float) y;
/*
debugArray[index].z = (float) y;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? forceSign*force[0] : 0.0f;
debugArray[index].y = mask ? forceSign*force[1] : 0.0f;
debugArray[index].z = mask ? forceSign*force[2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexI][0] : 0.0f;
debugArray[index].y = mask ? torque[indexI][1] : 0.0f;
debugArray[index].z = mask ? torque[indexI][2] : 0.0f;
debugArray[index].w = blockId;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = mask ? torque[indexJ][0] : 0.0f;
debugArray[index].y = mask ? torque[indexJ][1] : 0.0f;
debugArray[index].z = mask ? torque[indexJ][2] : 0.0f;
*/
index += cAmoebaSim.paddedNumberOfAtoms;
int pullIndex = 0;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
debugArray[index].w = blockId;
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
pullIndex++;
debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
}
#if 0
index += cAmoebaSim.paddedNumberOfAtoms;
......@@ -550,13 +498,6 @@ if( atomI == targetAtom || atomJ == targetAtom ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = scalingFactors[MScaleIndex];
debugArray[index].y = mScaleVal;
for( int pIndex = 0; pIndex < 14; pIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = pullBack[pIndex].x;
debugArray[index].y = pullBack[pIndex].y;
debugArray[index].z = pullBack[pIndex].z;
debugArray[index].w = pullBack[pIndex].w;
}
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = labFrameDipole[3*atomI];
......
......@@ -322,7 +322,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
......@@ -487,3 +487,9 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
}
}
void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{
cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu );
kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu );
}
......@@ -616,6 +616,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0], amoebaGpu->psWorkVector[1] );
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n");
......
......@@ -390,7 +390,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){
cudaComputeAmoebaElectrostatic( amoebaGpu );
} else {
cudaComputeAmoebaRealSpaceEwald( amoebaGpu );
cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
}
// map torques to forces
......
......@@ -2008,6 +2008,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
multipoleForce->setNonbondedMethod( AmoebaMultipoleForce::NoCutoff );
}
multipoleForce->setCutoffDistance( cutoffDistance );
multipoleForce->setAEwald( aewald );
system.setDefaultPeriodicBoxVectors( Vec3(box[0], 0.0, 0.0), Vec3(0.0, box[1], 0.0), Vec3(0.0, 0.0, box[2]) );
if( log ){
(void) fprintf( log, "%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f\n",
......@@ -2111,6 +2112,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
double polarityConversion = AngstromToNm*AngstromToNm*AngstromToNm;
double dampingFactorConversion = sqrt( AngstromToNm );
multipoleForce->setAEwald( multipoleForce->getAEwald()/AngstromToNm );
multipoleForce->setCutoffDistance( multipoleForce->getCutoffDistance()*AngstromToNm );
multipoleForce->setScalingDistanceCutoff( multipoleForce->getScalingDistanceCutoff()*AngstromToNm );
......@@ -2165,8 +2167,8 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
(useOpenMMUnits ? "OpenMM" : "Amoeba") );
std::string nonbondedMethod = multipoleForce->getNonbondedMethod( ) == AmoebaMultipoleForce::PME ? "PME" : "NoCutoff";
(void) fprintf( log, "NonbondedMethod=%s cutoff=%15.7e.\n", nonbondedMethod.c_str(),
multipoleForce->getCutoffDistance() );
(void) fprintf( log, "NonbondedMethod=%s aEwald=%15.7e cutoff=%15.7e.\n", nonbondedMethod.c_str(),
multipoleForce->getAEwald(), multipoleForce->getCutoffDistance() );
Vec3 a,b,c;
system.getDefaultPeriodicBoxVectors( a, b, c );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment