Commit 51828eaa authored by Peter Eastman's avatar Peter Eastman
Browse files

Merge branch 'master' into qc

parents cf8a03e8 5ed9dd65
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
#--------------------------------------------------- #---------------------------------------------------
# OpenMM CUDA Amoeba Implementation # OpenMM CUDA Amoeba Implementation
# #
# Creates OpenMM library, base name=OpenMMAmoebaCUDA. # Creates OpenMMAmoebaCUDA library.
# Default libraries are shared & optimized. Variants
# are created for debug (_d).
# #
# Windows: # Windows:
# OpenMMAmoebaCUDA[_d].dll # OpenMMAmoebaCUDA.dll
# OpenMMAmoebaCUDA[_d].lib # OpenMMAmoebaCUDA.lib
# Unix: # Unix:
# libOpenMMAmoebaCUDA[_d].so # libOpenMMAmoebaCUDA.so
#---------------------------------------------------- #----------------------------------------------------
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
...@@ -25,18 +23,6 @@ SET(OPENMMAMOEBACUDA_LIBRARY_NAME OpenMMAmoebaCUDA) ...@@ -25,18 +23,6 @@ SET(OPENMMAMOEBACUDA_LIBRARY_NAME OpenMMAmoebaCUDA)
SET(SHARED_TARGET ${OPENMMAMOEBACUDA_LIBRARY_NAME}) SET(SHARED_TARGET ${OPENMMAMOEBACUDA_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (MSVC)
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (MSVC)
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are # These are all the places to search for header files which are
# to be part of the API. # to be part of the API.
SET(API_INCLUDE_DIRS) # start empty SET(API_INCLUDE_DIRS) # start empty
...@@ -101,14 +87,9 @@ ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} ...@@ -101,14 +87,9 @@ ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE) SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${CUDA_LIBRARIES} ${PTHREADS_LIB})
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}CUDA)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${SHARED_AMOEBA_TARGET})
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${CUDA_LIBRARIES} ${PTHREADS_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}CUDA_d optimized ${OPENMM_LIBRARY_NAME}CUDA)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${SHARED_AMOEBA_TARGET} optimized ${SHARED_AMOEBA_TARGET})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY")
IF (APPLE) IF (APPLE)
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA")
...@@ -119,11 +100,7 @@ ENDIF (APPLE) ...@@ -119,11 +100,7 @@ ENDIF (APPLE)
INSTALL(TARGETS ${SHARED_TARGET} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/plugins) INSTALL(TARGETS ${SHARED_TARGET} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/plugins)
# Ensure that links to the main CUDA library will be resolved. # Ensure that links to the main CUDA library will be resolved.
IF (APPLE) IF (APPLE)
IF (CMAKE_BUILD_TYPE MATCHES Debug) SET(CUDA_LIBRARY libOpenMMCUDA.dylib)
SET(CUDA_LIBRARY libOpenMMCUDA_d.dylib)
ELSE (CMAKE_BUILD_TYPE MATCHES Debug)
SET(CUDA_LIBRARY libOpenMMCUDA.dylib)
ENDIF (CMAKE_BUILD_TYPE MATCHES Debug)
INSTALL(CODE "EXECUTE_PROCESS(COMMAND install_name_tool -change ${CUDA_LIBRARY} @loader_path/${CUDA_LIBRARY} ${CMAKE_INSTALL_PREFIX}/lib/plugins/lib${SHARED_TARGET}.dylib)") INSTALL(CODE "EXECUTE_PROCESS(COMMAND install_name_tool -change ${CUDA_LIBRARY} @loader_path/${CUDA_LIBRARY} ${CMAKE_INSTALL_PREFIX}/lib/plugins/lib${SHARED_TARGET}.dylib)")
ENDIF (APPLE) ENDIF (APPLE)
if(OPENMM_BUILD_CUDA_TESTS) if(OPENMM_BUILD_CUDA_TESTS)
......
...@@ -921,6 +921,10 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -921,6 +921,10 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
molecularQuadrupolesVec.push_back((float) quadrupole[4]); molecularQuadrupolesVec.push_back((float) quadrupole[4]);
molecularQuadrupolesVec.push_back((float) quadrupole[5]); molecularQuadrupolesVec.push_back((float) quadrupole[5]);
} }
hasQuadrupoles = false;
for (int i = 0; i < (int) molecularQuadrupolesVec.size(); i++)
if (molecularQuadrupolesVec[i] != 0.0)
hasQuadrupoles = true;
int paddedNumAtoms = cu.getPaddedNumAtoms(); int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = numMultipoles; i < paddedNumAtoms; i++) { for (int i = numMultipoles; i < paddedNumAtoms; i++) {
dampingAndTholeVec.push_back(make_float2(0, 0)); dampingAndTholeVec.push_back(make_float2(0, 0));
...@@ -1040,6 +1044,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1040,6 +1044,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["DIRECT_POLARIZATION"] = ""; defines["DIRECT_POLARIZATION"] = "";
if (useShuffle) if (useShuffle)
defines["USE_SHUFFLE"] = ""; defines["USE_SHUFFLE"] = "";
if (hasQuadrupoles)
defines["INCLUDE_QUADRUPOLES"] = "";
defines["TILE_SIZE"] = cu.intToString(CudaContext::TileSize); defines["TILE_SIZE"] = cu.intToString(CudaContext::TileSize);
int numExclusionTiles = tilesWithExclusions.size(); int numExclusionTiles = tilesWithExclusions.size();
defines["NUM_TILES_WITH_EXCLUSIONS"] = cu.intToString(numExclusionTiles); defines["NUM_TILES_WITH_EXCLUSIONS"] = cu.intToString(numExclusionTiles);
...@@ -1104,9 +1110,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1104,9 +1110,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if (usePME) { if (usePME) {
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics; electrostaticsSource << CudaAmoebaKernelSources::pmeMultipoleElectrostatics;
electrostaticsSource << CudaAmoebaKernelSources::pmeElectrostaticPairForce; electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::pmeElectrostaticPairForce : CudaAmoebaKernelSources::pmeElectrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#define APPLY_SCALE\n"; electrostaticsSource << "#define APPLY_SCALE\n";
electrostaticsSource << CudaAmoebaKernelSources::pmeElectrostaticPairForce; electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::pmeElectrostaticPairForce : CudaAmoebaKernelSources::pmeElectrostaticPairForceNoQuadrupoles);
electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize; electrostaticsThreadMemory = 24*elementSize+3*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (!useShuffle) if (!useShuffle)
electrostaticsThreadMemory += 3*elementSize; electrostaticsThreadMemory += 3*elementSize;
...@@ -1115,13 +1121,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1115,13 +1121,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics; electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics;
electrostaticsSource << "#define F1\n"; electrostaticsSource << "#define F1\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce; electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#undef F1\n"; electrostaticsSource << "#undef F1\n";
electrostaticsSource << "#define T1\n"; electrostaticsSource << "#define T1\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce; electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsSource << "#undef T1\n"; electrostaticsSource << "#undef T1\n";
electrostaticsSource << "#define T3\n"; electrostaticsSource << "#define T3\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce; electrostaticsSource << (hasQuadrupoles ? CudaAmoebaKernelSources::electrostaticPairForce : CudaAmoebaKernelSources::electrostaticPairForceNoQuadrupoles);
electrostaticsThreadMemory = 21*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize; electrostaticsThreadMemory = 21*elementSize+2*sizeof(float)+3*sizeof(int)/(double) cu.TileSize;
if (!useShuffle) if (!useShuffle)
electrostaticsThreadMemory += 3*elementSize; electrostaticsThreadMemory += 3*elementSize;
...@@ -1851,6 +1857,11 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co ...@@ -1851,6 +1857,11 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
molecularQuadrupolesVec.push_back((float) quadrupole[4]); molecularQuadrupolesVec.push_back((float) quadrupole[4]);
molecularQuadrupolesVec.push_back((float) quadrupole[5]); molecularQuadrupolesVec.push_back((float) quadrupole[5]);
} }
if (!hasQuadrupoles) {
for (int i = 0; i < (int) molecularQuadrupolesVec.size(); i++)
if (molecularQuadrupolesVec[i] != 0.0)
throw OpenMMException("updateParametersInContext: Cannot set a non-zero quadrupole moment, because quadrupoles were excluded from the kernel");
}
for (int i = force.getNumMultipoles(); i < cu.getPaddedNumAtoms(); i++) { for (int i = force.getNumMultipoles(); i < cu.getPaddedNumAtoms(); i++) {
dampingAndTholeVec.push_back(make_float2(0, 0)); dampingAndTholeVec.push_back(make_float2(0, 0));
polarizabilityVec.push_back(0); polarizabilityVec.push_back(0);
......
...@@ -380,7 +380,7 @@ private: ...@@ -380,7 +380,7 @@ private:
int numMultipoles, maxInducedIterations; int numMultipoles, maxInducedIterations;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads; int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
double inducedEpsilon; double inducedEpsilon;
bool hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid; bool hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
......
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#else
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#endif
#ifdef F1
const float uScale = 1;
real ddsc3_0 = 0;
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc5_0 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc7_0 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
#endif
real xr = atom2.posq.x - atom1.posq.x;
real yr = atom2.posq.y - atom1.posq.y;
real zr = atom2.posq.z - atom1.posq.z;
real r2 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r2);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
#ifdef F1
real rr11 = 9*rr9*rr2;
#endif
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real pdamp = atom1.damp*atom2.damp;
if (pdamp != 0) {
real ratio = r/pdamp;
float pGamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real damp = ratio*ratio*ratio*pGamma;
real dampExp = EXP(-damp);
real damp1 = damp + 1;
real damp2 = damp*damp;
scale3 = 1 - dampExp;
scale5 = 1 - damp1*dampExp;
scale7 = 1 - (damp1 + 0.6f*damp2)*dampExp;
#ifdef F1
real factor = 3*damp*dampExp*rr2;
real factor7 = -0.2f + 0.6f*damp;
ddsc3_0 = factor*xr;
ddsc5_0 = ddsc3_0*damp;
ddsc7_0 = ddsc5_0*factor7;
ddsc3_1 = factor*yr;
ddsc5_1 = ddsc3_1*damp;
ddsc7_1 = ddsc5_1*factor7;
ddsc3_2 = factor*zr;
ddsc5_2 = ddsc3_2*damp;
ddsc7_2 = ddsc5_2*factor7;
#endif
}
#if defined F1
real scale3i = rr3*scale3*uScale;
real scale5i = rr5*scale5*uScale;
#endif
real dsc3 = rr3*scale3*dScale;
real psc3 = rr3*scale3*pScale;
real dsc5 = rr5*scale5*dScale;
real psc5 = rr5*scale5*pScale;
real dsc7 = rr7*scale7*dScale;
real psc7 = rr7*scale7*pScale;
#if defined F1
real sc2 = atom1.dipole.x*atom2.dipole.x + atom1.dipole.y*atom2.dipole.y + atom1.dipole.z*atom2.dipole.z;
#endif
#if defined F1 || defined T1
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
#endif
#if defined F1 || defined T3
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
#endif
#if defined F1
real sci1 = atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y + atom1.inducedDipole.z*atom2.dipole.z +
atom2.inducedDipole.x*atom1.dipole.x + atom2.inducedDipole.y*atom1.dipole.y + atom2.inducedDipole.z*atom1.dipole.z;
#endif
#if defined F1 || defined T3
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
#endif
#if defined F1 || defined T1
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#endif
#if defined F1
real scip1 = atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom2.inducedDipolePolar.x*atom1.dipole.x + atom2.inducedDipolePolar.y*atom1.dipole.y + atom2.inducedDipolePolar.z*atom1.dipole.z;
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x + atom2.inducedDipole.y*atom1.inducedDipolePolar.y + atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
#endif
#if defined F1 || defined T3
real scip3 = ((atom1.inducedDipolePolar.x)*(xr) + (atom1.inducedDipolePolar.y)*(yr) + (atom1.inducedDipolePolar.z)*(zr));
#endif
#if defined F1 || defined T1
real scip4 = ((atom2.inducedDipolePolar.x)*(xr) + (atom2.inducedDipolePolar.y)*(yr) + (atom2.inducedDipolePolar.z)*(zr));
#endif
#ifdef F1
real gli1 = atom2.posq.w*sci3 - atom1.posq.w*sci4;
real gli6 = sci1;
real glip1 = atom2.posq.w*scip3 - atom1.posq.w*scip4;
real glip6 = scip1;
real gli2 = -sc3*sci4 - sci3*sc4;
real glip2 = -sc3*scip4 - scip3*sc4;
real factor3 = rr3*((gli1 + gli6)*pScale + (glip1 + glip6)*dScale);
real factor5 = rr5*(gli2*pScale + glip2*dScale);
real ftm2i_0 = -0.5f*(factor3*ddsc3_0 + factor5*ddsc5_0);
real ftm2i_1 = -0.5f*(factor3*ddsc3_1 + factor5*ddsc5_1);
real ftm2i_2 = -0.5f*(factor3*ddsc3_2 + factor5*ddsc5_2);
real gl0 = atom1.posq.w*atom2.posq.w;
real gl1 = atom2.posq.w*sc3 - atom1.posq.w*sc4;
real gl2 = -sc3*sc4;
real gl6 = sc2;
real gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*gl2;
#endif
#if defined F1 || defined T1
real gf2 = -atom2.posq.w*rr3 + sc4*rr5;
real gf5 = 2*(-atom2.posq.w*rr5+sc4*rr7);
#endif
#if defined F1 || defined T3
real gf3 = atom1.posq.w*rr3 + sc3*rr5;
real gf6 = 2*(-atom1.posq.w*rr5-sc3*rr7);
#endif
#ifdef F1
real em = mScale*(rr1*gl0 + rr3*(gl1+gl6) + rr5*gl2);
real ei = 0.5f*((gli1+gli6)*psc3 + gli2*psc5);
energy = em+ei;
#endif
#ifdef F1
real ftm2_0 = mScale*(gf1*xr + gf2*atom1.dipole.x + gf3*atom2.dipole.x);
real ftm2_1 = mScale*(gf1*yr + gf2*atom1.dipole.y + gf3*atom2.dipole.y);
real ftm2_2 = mScale*(gf1*zr + gf2*atom1.dipole.z + gf3*atom2.dipole.z);
real gfi1 = rr2*(1.5f*((gli1+gli6)*psc3 + (glip1+glip6)*dsc3 + scip2*scale3i) + 2.5f*(gli2*psc5 + glip2*dsc5 - (sci3*scip4+scip3*sci4)*scale5i));
ftm2i_0 += gfi1*xr;
ftm2i_1 += gfi1*yr;
ftm2i_2 += gfi1*zr;
#endif
#if defined F1 || defined T1
real gfi5 = (sci4*psc7 + scip4*dsc7);
#endif
#if defined F1 || defined T3
real gfi6 = -(sci3*psc7 + scip3*dsc7);
#endif
#ifdef F1
ftm2i_0 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.x*psc3 + atom1.inducedDipolePolar.x*dsc3) +
sc4*(atom1.inducedDipole.x*psc5 + atom1.inducedDipolePolar.x*dsc5)) +
0.5f*(atom1.posq.w*(atom2.inducedDipole.x*psc3+atom2.inducedDipolePolar.x*dsc3) +
sc3*(atom2.inducedDipole.x*psc5 +atom2.inducedDipolePolar.x*dsc5)) +
scale5i*(sci4*atom1.inducedDipolePolar.x+scip4*atom1.inducedDipole.x +
sci3*atom2.inducedDipolePolar.x+scip3*atom2.inducedDipole.x)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.x +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.x;
ftm2i_1 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.y*psc3 + atom1.inducedDipolePolar.y*dsc3) +
sc4*(atom1.inducedDipole.y*psc5 + atom1.inducedDipolePolar.y*dsc5)) +
(atom1.posq.w*(atom2.inducedDipole.y*psc3+atom2.inducedDipolePolar.y*dsc3) +
sc3*(atom2.inducedDipole.y*psc5+atom2.inducedDipolePolar.y*dsc5))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.y+scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y+scip3*atom2.inducedDipole.y)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.y +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.y;
ftm2i_2 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.z*psc3 + atom1.inducedDipolePolar.z*dsc3) +
sc4*(atom1.inducedDipole.z*psc5 + atom1.inducedDipolePolar.z*dsc5)) +
(atom1.posq.w*(atom2.inducedDipole.z*psc3+atom2.inducedDipolePolar.z*dsc3) +
sc3*(atom2.inducedDipole.z*psc5+atom2.inducedDipolePolar.z*dsc5))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.z+scip4*atom1.inducedDipole.z +
sci3*atom2.inducedDipolePolar.z+scip3*atom2.inducedDipole.z)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.z +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.z;
#ifdef DIRECT_POLARIZATION
real gfd = 0.5*(3*rr2*scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real temp5 = 0.5*scale5i;
real fdir_0 = gfd*xr + temp5*(sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x + sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
real fdir_1 = gfd*yr + temp5*(sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
real fdir_2 = gfd*zr + temp5*(sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z + sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
ftm2i_0 -= fdir_0;
ftm2i_1 -= fdir_1;
ftm2i_2 -= fdir_2;
#else
real scaleF = 0.5f*uScale;
real inducedFactor3 = scip2*rr3*scaleF;
real inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
real findmp_0 = inducedFactor3*ddsc3_0 - inducedFactor5*ddsc5_0;
real findmp_1 = inducedFactor3*ddsc3_1 - inducedFactor5*ddsc5_1;
real findmp_2 = inducedFactor3*ddsc3_2 - inducedFactor5*ddsc5_2;
ftm2i_0 -= findmp_0;
ftm2i_1 -= findmp_1;
ftm2i_2 -= findmp_2;
#endif
#endif
#if defined T1
real gti2 = 0.5f*(sci4*psc5+scip4*dsc5);
real gti5 = gfi5;
#endif
#if defined T3
real gti3 = 0.5f*(sci3*psc5+scip3*dsc5);
real gti6 = gfi6;
#endif
#if defined T1 || defined T3
real dixdk_0 = atom1.dipole.y*atom2.dipole.z - atom1.dipole.z*atom2.dipole.y;
real dixdk_1 = atom1.dipole.z*atom2.dipole.x - atom1.dipole.x*atom2.dipole.z;
real dixdk_2 = atom1.dipole.x*atom2.dipole.y - atom1.dipole.y*atom2.dipole.x;
#if defined T1
real dixuk_0 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
real dixuk_1 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
real dixuk_2 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
#endif
#endif
#ifdef T1
real dixukp_0 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
real dixukp_1 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
real dixukp_2 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T1
real dixr_0 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr_1 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr_2 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T1
real ttm2_0 = -rr3*dixdk_0 + gf2*dixr_0;
real ttm2_1 = -rr3*dixdk_1 + gf2*dixr_1;
real ttm2_2 = -rr3*dixdk_2 + gf2*dixr_2;
real ttm2i_0 = -(dixuk_0*psc3+dixukp_0*dsc3)*0.5f + gti2*dixr_0;
real ttm2i_1 = -(dixuk_1*psc3+dixukp_1*dsc3)*0.5f + gti2*dixr_1;
real ttm2i_2 = -(dixuk_2*psc3+dixukp_2*dsc3)*0.5f + gti2*dixr_2;
#endif
#ifdef T3
real dkxr_0 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr_1 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr_2 = atom2.dipole.x*yr - atom2.dipole.y*xr;
real dkxui_0 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
real dkxui_1 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
real dkxui_2 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
real dkxuip_0 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
real dkxuip_1 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
real dkxuip_2 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
real ttm3_0 = rr3*dixdk_0 + gf3*dkxr_0;
real ttm3_1 = rr3*dixdk_1 + gf3*dkxr_1;
real ttm3_2 = rr3*dixdk_2 + gf3*dkxr_2;
real ttm3i_0 = -(dkxui_0*psc3+ dkxuip_0*dsc3)*0.5f + gti3*dkxr_0;
real ttm3i_1 = -(dkxui_1*psc3+ dkxuip_1*dsc3)*0.5f + gti3*dkxr_1;
real ttm3i_2 = -(dkxui_2*psc3+ dkxuip_2*dsc3)*0.5f + gti3*dkxr_2;
#endif
if (mScale < 1) {
#ifdef T1
ttm2_0 *= mScale;
ttm2_1 *= mScale;
ttm2_2 *= mScale;
#endif
#ifdef T3
ttm3_0 *= mScale;
ttm3_1 *= mScale;
ttm3_2 *= mScale;
#endif
}
#ifdef F1
outputForce.x = -(ftm2_0+ftm2i_0);
outputForce.y = -(ftm2_1+ftm2i_1);
outputForce.z = -(ftm2_2+ftm2i_2);
#endif
#ifdef T1
outputForce.x = (ttm2_0 + ttm2i_0);
outputForce.y = (ttm2_1 + ttm2i_1);
outputForce.z = (ttm2_2 + ttm2i_2);
#endif
#ifdef T3
outputForce.x = (ttm3_0 + ttm3i_0);
outputForce.y = (ttm3_1 + ttm3i_1);
outputForce.z = (ttm3_2 + ttm3i_2);
#endif
}
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
typedef struct { typedef struct {
real4 posq; real4 posq;
real3 force, dipole, inducedDipole, inducedDipolePolar; real3 force, dipole, inducedDipole, inducedDipolePolar;
#ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ; real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ; real quadrupoleYY, quadrupoleYZ;
#endif
float thole, damp; float thole, damp;
} AtomData; } AtomData;
...@@ -18,11 +20,13 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -18,11 +20,13 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.dipole.x = labFrameDipole[atom*3]; data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1]; data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2]; data.dipole.z = labFrameDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5]; data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1]; data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2]; data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2]; data.inducedDipole.z = inducedDipole[atom*3+2];
...@@ -92,11 +96,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -92,11 +96,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].posq = data.posq; localData[threadIdx.x].posq = data.posq;
localData[threadIdx.x].dipole = data.dipole; localData[threadIdx.x].dipole = data.dipole;
#ifdef INCLUDE_QUADRUPOLES
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX; localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY; localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ; localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY; localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
#endif
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; localData[threadIdx.x].thole = data.thole;
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
typedef struct { typedef struct {
real4 posq; real4 posq;
real3 field, fieldPolar, dipole; real3 field, fieldPolar, dipole;
#ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ; real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ, quadrupoleZZ; real quadrupoleYY, quadrupoleYZ, quadrupoleZZ;
#endif
float thole, damp; float thole, damp;
#ifdef USE_GK #ifdef USE_GK
real3 gkField; real3 gkField;
...@@ -17,12 +19,14 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -17,12 +19,14 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.dipole.x = labFrameDipole[atom*3]; data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1]; data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2]; data.dipole.z = labFrameDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5]; data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1]; data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2]; data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY); data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
#endif
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
...@@ -85,15 +89,15 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -85,15 +89,15 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real prr7 = 15*(1-psc7)/r7; real prr7 = 15*(1-psc7)/r7;
real dir = dot(atom1.dipole, deltaR); real dir = dot(atom1.dipole, deltaR);
real dkr = dot(atom2.dipole, deltaR);
#ifdef INCLUDE_QUADRUPOLES
real3 qi; real3 qi;
qi.x = atom1.quadrupoleXX*deltaR.x + atom1.quadrupoleXY*deltaR.y + atom1.quadrupoleXZ*deltaR.z; qi.x = atom1.quadrupoleXX*deltaR.x + atom1.quadrupoleXY*deltaR.y + atom1.quadrupoleXZ*deltaR.z;
qi.y = atom1.quadrupoleXY*deltaR.x + atom1.quadrupoleYY*deltaR.y + atom1.quadrupoleYZ*deltaR.z; qi.y = atom1.quadrupoleXY*deltaR.x + atom1.quadrupoleYY*deltaR.y + atom1.quadrupoleYZ*deltaR.z;
qi.z = atom1.quadrupoleXZ*deltaR.x + atom1.quadrupoleYZ*deltaR.y + atom1.quadrupoleZZ*deltaR.z; qi.z = atom1.quadrupoleXZ*deltaR.x + atom1.quadrupoleYZ*deltaR.y + atom1.quadrupoleZZ*deltaR.z;
real qir = dot(qi, deltaR); real qir = dot(qi, deltaR);
real dkr = dot(atom2.dipole, deltaR);
real3 qk; real3 qk;
qk.x = atom2.quadrupoleXX*deltaR.x + atom2.quadrupoleXY*deltaR.y + atom2.quadrupoleXZ*deltaR.z; qk.x = atom2.quadrupoleXX*deltaR.x + atom2.quadrupoleXY*deltaR.y + atom2.quadrupoleXZ*deltaR.z;
qk.y = atom2.quadrupoleXY*deltaR.x + atom2.quadrupoleYY*deltaR.y + atom2.quadrupoleYZ*deltaR.z; qk.y = atom2.quadrupoleXY*deltaR.x + atom2.quadrupoleYY*deltaR.y + atom2.quadrupoleYZ*deltaR.z;
...@@ -106,7 +110,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -106,7 +110,14 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real3 fkd = deltaR*(drr3*atom1.posq.w+drr5*dir+drr7*qir) - drr3*atom1.dipole - 2*drr5*qi; real3 fkd = deltaR*(drr3*atom1.posq.w+drr5*dir+drr7*qir) - drr3*atom1.dipole - 2*drr5*qi;
real3 fip = -deltaR*(prr3*atom2.posq.w-prr5*dkr+prr7*qkr) - prr3*atom2.dipole + 2*prr5*qk; real3 fip = -deltaR*(prr3*atom2.posq.w-prr5*dkr+prr7*qkr) - prr3*atom2.dipole + 2*prr5*qk;
real3 fkp = deltaR*(prr3*atom1.posq.w+prr5*dir+prr7*qir) - prr3*atom1.dipole - 2*prr5*qi; real3 fkp = deltaR*(prr3*atom1.posq.w+prr5*dir+prr7*qir) - prr3*atom1.dipole - 2*prr5*qi;
#else
real3 fim = -deltaR*(bn1*atom2.posq.w-bn2*dkr) - bn1*atom2.dipole;
real3 fkm = deltaR*(bn1*atom1.posq.w+bn2*dir) - bn1*atom1.dipole;
real3 fid = -deltaR*(drr3*atom2.posq.w-drr5*dkr) - drr3*atom2.dipole;
real3 fkd = deltaR*(drr3*atom1.posq.w+drr5*dir) - drr3*atom1.dipole;
real3 fip = -deltaR*(prr3*atom2.posq.w-prr5*dkr) - prr3*atom2.dipole;
real3 fkp = deltaR*(prr3*atom1.posq.w+prr5*dir) - prr3*atom1.dipole;
#endif
// increment the field at each site due to this interaction // increment the field at each site due to this interaction
fields[0] = fim-fid; fields[0] = fim-fid;
...@@ -153,29 +164,34 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -153,29 +164,34 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real rr5_2 = 2*rr5; real rr5_2 = 2*rr5;
real3 qDotDelta; real dir = dot(atom1.dipole, deltaR);
qDotDelta.x = deltaR.x*atom2.quadrupoleXX + deltaR.y*atom2.quadrupoleXY + deltaR.z*atom2.quadrupoleXZ; real dkr = dot(atom2.dipole, deltaR);
qDotDelta.y = deltaR.x*atom2.quadrupoleXY + deltaR.y*atom2.quadrupoleYY + deltaR.z*atom2.quadrupoleYZ;
qDotDelta.z = deltaR.x*atom2.quadrupoleXZ + deltaR.y*atom2.quadrupoleYZ + deltaR.z*atom2.quadrupoleZZ; #ifdef INCLUDE_QUADRUPOLES
real3 qi;
real dotdd = dot(deltaR, atom2.dipole); qi.x = atom1.quadrupoleXX*deltaR.x + atom1.quadrupoleXY*deltaR.y + atom1.quadrupoleXZ*deltaR.z;
real dotqd = dot(deltaR, qDotDelta); qi.y = atom1.quadrupoleXY*deltaR.x + atom1.quadrupoleYY*deltaR.y + atom1.quadrupoleYZ*deltaR.z;
qi.z = atom1.quadrupoleXZ*deltaR.x + atom1.quadrupoleYZ*deltaR.y + atom1.quadrupoleZZ*deltaR.z;
real factor = -rr3*atom2.posq.w + rr5*dotdd - rr7*dotqd; real qir = dot(qi, deltaR);
real3 field1 = deltaR*factor - rr3*atom2.dipole + rr5_2*qDotDelta; real3 qk;
qk.x = atom2.quadrupoleXX*deltaR.x + atom2.quadrupoleXY*deltaR.y + atom2.quadrupoleXZ*deltaR.z;
qk.y = atom2.quadrupoleXY*deltaR.x + atom2.quadrupoleYY*deltaR.y + atom2.quadrupoleYZ*deltaR.z;
qk.z = atom2.quadrupoleXZ*deltaR.x + atom2.quadrupoleYZ*deltaR.y + atom2.quadrupoleZZ*deltaR.z;
real qkr = dot(qk, deltaR);
real factor = -rr3*atom2.posq.w + rr5*dkr - rr7*qkr;
real3 field1 = deltaR*factor - rr3*atom2.dipole + rr5_2*qk;
factor = rr3*atom1.posq.w + rr5*dir + rr7*qir;
real3 field2 = deltaR*factor - rr3*atom1.dipole - rr5_2*qi;
#else
real factor = -rr3*atom2.posq.w + rr5*dkr;
real3 field1 = deltaR*factor - rr3*atom2.dipole;
factor = rr3*atom1.posq.w + rr5*dir;
real3 field2 = deltaR*factor - rr3*atom1.dipole;
#endif
fields[0] = dScale*field1; fields[0] = dScale*field1;
fields[1] = pScale*field1; fields[1] = pScale*field1;
qDotDelta.x = deltaR.x*atom1.quadrupoleXX + deltaR.y*atom1.quadrupoleXY + deltaR.z*atom1.quadrupoleXZ;
qDotDelta.y = deltaR.x*atom1.quadrupoleXY + deltaR.y*atom1.quadrupoleYY + deltaR.z*atom1.quadrupoleYZ;
qDotDelta.z = deltaR.x*atom1.quadrupoleXZ + deltaR.y*atom1.quadrupoleYZ + deltaR.z*atom1.quadrupoleZZ;
dotdd = dot(deltaR, atom1.dipole);
dotqd = dot(deltaR, qDotDelta);
factor = rr3*atom1.posq.w + rr5*dotdd + rr7*dotqd;
real3 field2 = deltaR*factor - rr3*atom1.dipole - rr5_2*qDotDelta;
fields[2] = dScale*field2; fields[2] = dScale*field2;
fields[3] = pScale*field2; fields[3] = pScale*field2;
} }
...@@ -200,6 +216,7 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 ...@@ -200,6 +216,7 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3
real uyk = atom2.dipole.y; real uyk = atom2.dipole.y;
real uzk = atom2.dipole.z; real uzk = atom2.dipole.z;
#ifdef INCLUDE_QUADRUPOLES
real qxxi = atom1.quadrupoleXX; real qxxi = atom1.quadrupoleXX;
real qxyi = atom1.quadrupoleXY; real qxyi = atom1.quadrupoleXY;
real qxzi = atom1.quadrupoleXZ; real qxzi = atom1.quadrupoleXZ;
...@@ -212,7 +229,20 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3 ...@@ -212,7 +229,20 @@ __device__ void computeOneGkInteraction(AtomData& atom1, AtomData& atom2, real3
real qyyk = atom2.quadrupoleYY; real qyyk = atom2.quadrupoleYY;
real qyzk = atom2.quadrupoleYZ; real qyzk = atom2.quadrupoleYZ;
real qzzk = atom2.quadrupoleZZ; real qzzk = atom2.quadrupoleZZ;
#else
real qxxi = 0;
real qxyi = 0;
real qxzi = 0;
real qyyi = 0;
real qyzi = 0;
real qzzi = 0;
real qxxk = 0;
real qxyk = 0;
real qxzk = 0;
real qyyk = 0;
real qyzk = 0;
real qzzk = 0;
#endif
real xr2 = delta.x*delta.x; real xr2 = delta.x*delta.x;
real yr2 = delta.y*delta.y; real yr2 = delta.y*delta.y;
real zr2 = delta.z*delta.z; real zr2 = delta.z*delta.z;
...@@ -438,12 +468,14 @@ extern "C" __global__ void computeFixedField( ...@@ -438,12 +468,14 @@ extern "C" __global__ void computeFixedField(
const unsigned int localAtomIndex = threadIdx.x; const unsigned int localAtomIndex = threadIdx.x;
localData[localAtomIndex].posq = data.posq; localData[localAtomIndex].posq = data.posq;
localData[localAtomIndex].dipole = data.dipole; localData[localAtomIndex].dipole = data.dipole;
#ifdef INCLUDE_QUADRUPOLES
localData[localAtomIndex].quadrupoleXX = data.quadrupoleXX; localData[localAtomIndex].quadrupoleXX = data.quadrupoleXX;
localData[localAtomIndex].quadrupoleXY = data.quadrupoleXY; localData[localAtomIndex].quadrupoleXY = data.quadrupoleXY;
localData[localAtomIndex].quadrupoleXZ = data.quadrupoleXZ; localData[localAtomIndex].quadrupoleXZ = data.quadrupoleXZ;
localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY; localData[localAtomIndex].quadrupoleYY = data.quadrupoleYY;
localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ; localData[localAtomIndex].quadrupoleYZ = data.quadrupoleYZ;
localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ; localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ;
#endif
localData[localAtomIndex].thole = data.thole; localData[localAtomIndex].thole = data.thole;
localData[localAtomIndex].damp = data.damp; localData[localAtomIndex].damp = data.damp;
#ifdef USE_GK #ifdef USE_GK
......
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF1(
#else
computeOneInteractionF1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
#ifdef APPLY_SCALE
real offset = 1-mScale;
real rr3 = rr1*rr1*rr1;
real gf4 = 2*(bn2 - 3*offset*rr3*rr1*rr1);
#else
real gf4 = 2*bn2;
#endif
real ftm21 = 0;
real ftm22 = 0;
real ftm23 = 0;
// calculate the scalar products for permanent components
real gl6 = di1*dk1 + di2*dk2 + di3*dk3;
real sc3 = di1*xr + di2*yr + di3*zr;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real gl0 = ci*ck;
real gl1 = ck*sc3 - ci*sc4;
real gl2 = -sc3*sc4;
#ifdef APPLY_SCALE
energy += forceFactor*(-offset*rr1*gl0 + (bn1-offset*rr3)*(gl1+gl6) + (bn2-offset*(3*rr3*rr1*rr1))*gl2);
#else
energy += forceFactor*(bn1*(gl1+gl6) + bn2*gl2);
#endif
real gf1 = bn1*gl0 + bn2*(gl1+gl6) + bn3*gl2;
#ifdef APPLY_SCALE
gf1 -= offset*(rr3*gl0 + (3*rr3*rr1*rr1)*(gl1+gl6) + (15*rr3*rr3*rr1)*gl2);
#endif
ftm21 += gf1*xr;
ftm22 += gf1*yr;
ftm23 += gf1*zr;
#ifdef APPLY_SCALE
real gf2 = -ck*bn1 + sc4*bn2 - offset*(-ck*rr3 + sc4*(3*rr3*rr1*rr1));
#else
real gf2 = -ck*bn1 + sc4*bn2;
#endif
ftm21 += gf2*di1;
ftm22 += gf2*di2;
ftm23 += gf2*di3;
#ifdef APPLY_SCALE
real gf3 = ci*bn1 + sc3*bn2 - offset*(ci*rr3 + sc3*(3*rr3*rr1*rr1));
#else
real gf3 = ci*bn1 + sc3*bn2;
#endif
ftm21 += gf3*dk1;
ftm22 += gf3*dk2;
ftm23 += gf3*dk3;
force.x = ftm21;
force.y = ftm22;
force.z = ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionF2(
#else
computeOneInteractionF2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, float forceFactor,
#ifdef APPLY_SCALE
float dScale, float pScale, float mScale,
#endif
real3& force, real& energy) {
const float uScale = 1;
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real ci = atom1.q;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
damp = damp < -50 ? 0 : damp;
}
real scale5 = (damp == 0) ? 1 : (1 - (1-damp)*EXP(damp));
real rr5 = rr1*rr1;
rr5 = 3*rr1*rr5*rr5;
#ifdef APPLY_SCALE
real psc5 = rr5*(1 - scale5*pScale);
real dsc5 = rr5*(1 - scale5*dScale);
real usc5 = rr5*(1 - scale5*uScale);
#else
real psc5 = rr5*(1 - scale5);
#endif
real ftm21 = 0;
real ftm22 = 0;
real ftm23 = 0;
real expdamp = EXP(damp);
real scale3 = (damp == 0) ? 1 : (1 - expdamp);
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real psc3 = rr3*(1 - scale3*pScale);
real dsc3 = rr3*(1 - scale3*dScale);
real usc3 = rr3*(1 - scale3*uScale);
#else
real psc3 = rr3*(1 - scale3);
#endif
real scale7 = (damp == 0) ? 1 : (1 - (1-damp+0.6f*damp*damp)*expdamp);
#ifdef APPLY_SCALE
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
#else
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real sc3 = di1*xr + di2*yr + di3*zr;
real gfi3 = ci*bn1 + sc3*bn2;
real prefactor1;
prefactor1 = 0.5f*(ci*psc3 + sc3*psc5 - gfi3);
ftm21 -= prefactor1*atom2.inducedDipole.x;
ftm22 -= prefactor1*atom2.inducedDipole.y;
ftm23 -= prefactor1*atom2.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ci*dsc3 + sc3*dsc5 - gfi3);
#endif
ftm21 -= prefactor1*atom2.inducedDipolePolar.x;
ftm22 -= prefactor1*atom2.inducedDipolePolar.y;
ftm23 -= prefactor1*atom2.inducedDipolePolar.z;
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci4*((psc3-bn1)*ci + (psc5-bn2)*sc3);
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*((sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x));
ftm22 += prefactor1*((sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y));
ftm23 += prefactor1*((sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z));
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci4+scip4) - (sci4*psc5+scip4*dsc5));
#else
sci4 += scip4;
prefactor1 = 0.5f*sci4*(bn2 - psc5);
#endif
ftm21 += prefactor1*di1;
ftm22 += prefactor1*di2;
ftm23 += prefactor1*di3;
#ifdef APPLY_SCALE
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4;
real glip1 = -ci*scip4;
real glip2 = -sc3*scip4;
#else
real gli1 = -ci*sci4;
real gli2 = -sc3*sci4;
#endif
#ifdef APPLY_SCALE
real gfi1 = (bn2*(gli1+glip1) + bn3*(gli2+glip2));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5));
#else
real gfi1 = bn2*gli1 + bn3*gli2;
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5);
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
if (damp != 0) {
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = (gli1*pScale + glip1*dScale);
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
}
//K
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real ck = atom2.q;
real gfi2 = (-ck*bn1 + sc4*bn2);
prefactor1 = 0.5f*(ck*psc3 - sc4*psc5 + gfi2);
ftm21 += prefactor1*atom1.inducedDipole.x;
ftm22 += prefactor1*atom1.inducedDipole.y;
ftm23 += prefactor1*atom1.inducedDipole.z;
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(ck*dsc3 - sc4*dsc5 + gfi2);
#endif
ftm21 += prefactor1*atom1.inducedDipolePolar.x;
ftm22 += prefactor1*atom1.inducedDipolePolar.y;
ftm23 += prefactor1*atom1.inducedDipolePolar.z;
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
energy += forceFactor*0.5f*sci3*(ck*(bn1-psc3) - sc4*(bn2-psc5));
real scip3 = atom1.inducedDipolePolar.x*xr + atom1.inducedDipolePolar.y*yr + atom1.inducedDipolePolar.z*zr;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2 - usc5);
#else
prefactor1 = 0.5f*(bn2 - psc5);
#endif
ftm21 += prefactor1*(sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
ftm22 += prefactor1*(sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
ftm23 += prefactor1*(sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
real sci34;
sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
sci34 = (sci3*scip4+scip3*sci4);
#ifdef APPLY_SCALE
gfi1 = sci34*(usc5*(5*rr1*rr1) -bn3);
#else
gfi1 = sci34*(psc5*(5*rr1*rr1) -bn3);
#endif
#else
gfi1 = 0;
#endif
#ifdef APPLY_SCALE
prefactor1 = 0.5f*(bn2*(sci3+scip3) - (sci3*psc5+scip3*dsc5));
#else
sci3 += scip3;
prefactor1 = 0.5f*sci3*(bn2 - psc5);
#endif
ftm21 += prefactor1*dk1;
ftm22 += prefactor1*dk2;
ftm23 += prefactor1*dk3;
#ifdef APPLY_SCALE
real gfi6 = -bn3*(sci3+scip3) + (sci3*psc7+scip3*dsc7);
#else
real gfi6 = sci3*(psc7 - bn3);
#endif
real sci1 = atom1.inducedDipole.x*dk1 + atom1.inducedDipole.y*dk2 + atom1.inducedDipole.z*dk3 + di1*atom2.inducedDipole.x + di2*atom2.inducedDipole.y + di3*atom2.inducedDipole.z;
energy += forceFactor*0.5f*(sci1*(bn1-psc3));
real scip1 = atom1.inducedDipolePolar.x*dk1 + atom1.inducedDipolePolar.y*dk2 + atom1.inducedDipolePolar.z*dk3 + di1*atom2.inducedDipolePolar.x + di2*atom2.inducedDipolePolar.y + di3*atom2.inducedDipolePolar.z;
#ifndef APPLY_SCALE
sci1 += scip1;
#endif
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x +
atom1.inducedDipole.y*atom2.inducedDipolePolar.y +
atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x +
atom2.inducedDipole.y*atom1.inducedDipolePolar.y +
atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
gli1 = ck*sci3 + sci1;
gli2 = -sci3*sc4;
#ifdef APPLY_SCALE
glip1 = ck*scip3 + scip1;
glip2 = -scip3*sc4;
#endif
#ifdef APPLY_SCALE
gfi1 += (bn2*(gli1+glip1) + bn3*(gli2+glip2));
gfi1 -= (rr1*rr1)*(3*(gli1*psc3 + glip1*dsc3) + 5*(gli2*psc5 + glip2*dsc5));
#else
gfi1 += (bn2*gli1 + bn3*gli2);
gfi1 -= (rr1*rr1)*(3*gli1*psc3 + 5*gli2*psc5);
#endif
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
gfi1 += scip2*(bn2 - (3*rr1*rr1)*usc3);
#else
gfi1 += scip2*(bn2 - (3*rr1*rr1)*psc3);
#endif
#endif
gfi1 *= 0.5f;
ftm21 += gfi1*xr;
ftm22 += gfi1*yr;
ftm23 += gfi1*zr;
if (damp != 0) {
real expdamp = EXP(damp);
real temp3 = -1.5f*damp*expdamp*rr1*rr1;
real temp5 = -damp;
real temp7 = -0.2f - 0.6f*damp;
real ddsc31 = temp3*xr;
real ddsc32 = temp3*yr;
real ddsc33 = temp3*zr;
real ddsc51 = temp5*ddsc31;
real ddsc52 = temp5*ddsc32;
real ddsc53 = temp5*ddsc33;
real ddsc71 = temp7*ddsc51;
real ddsc72 = temp7*ddsc52;
real ddsc73 = temp7*ddsc53;
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
temp3 = gli1*pScale + glip1*dScale;
temp5 = (3*rr1*rr1)*(gli2*pScale + glip2*dScale);
#else
temp3 = gli1;
temp5 = (3*rr1*rr1)*gli2;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
temp3 = uScale*scip2;
temp5 = -(3*rr1*rr1)*uScale*sci34;
#else
temp3 = scip2;
temp5 = -(3*rr1*rr1)*sci34;
#endif
ftm21 -= rr3*(temp3*ddsc31 + temp5*ddsc51);
ftm22 -= rr3*(temp3*ddsc32 + temp5*ddsc52);
ftm23 -= rr3*(temp3*ddsc33 + temp5*ddsc53);
#endif
}
force.x += ftm21;
force.y += ftm22;
force.z += ftm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT1(
#else
computeOneInteractionT1NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
#ifdef APPLY_SCALE
real rr1 = delta.w;
#endif
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real ck = atom2.q;
real dk1 = atom2.dipole.x;
real dk2 = atom2.dipole.y;
real dk3 = atom2.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
real bn4 = bn.w;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
real scale = 1-mScale;
real prefactor = scale*rr3 - bn1;
#else
real prefactor = -bn1;
#endif
real dixdk1 = di2*dk3 - di3*dk2;
real ttm21 = prefactor*dixdk1;
real dixdk2 = di3*dk1 - di1*dk3;
real ttm22 = prefactor*dixdk2;
real dixdk3 = di1*dk2 - di2*dk1;
real ttm23 = prefactor*dixdk3;
real sc4 = dk1*xr + dk2*yr + dk3*zr;
real sc6 = 0;
real gf2 = -ck*bn1 + sc4*bn2;
#ifdef APPLY_SCALE
real gfr2 = -ck*rr3 + sc4*rr5;
prefactor = (gf2 - scale*gfr2);
#else
prefactor = gf2;
#endif
ttm21 += prefactor*(di2*zr - di3*yr);
ttm22 += prefactor*(di3*xr - di1*zr);
ttm23 += prefactor*(di1*yr - di2*xr);
atom1.torque.x += ttm21;
atom1.torque.y += ttm22;
atom1.torque.z += ttm23;
}
__device__ void
#ifdef APPLY_SCALE
computeOneInteractionT2(
#else
computeOneInteractionT2NoScale(
#endif
AtomData& atom1, volatile AtomData& atom2, const real4 delta, const real4 bn
#ifdef APPLY_SCALE
, float dScale, float pScale, float mScale
#endif
) {
real xr = delta.x;
real yr = delta.y;
real zr = delta.z;
real rr1 = delta.w;
// set the permanent multipole and induced dipole values;
real di1 = atom1.dipole.x;
real di2 = atom1.dipole.y;
real di3 = atom1.dipole.z;
real bn1 = bn.x;
real bn2 = bn.y;
real bn3 = bn.z;
// apply Thole polarization damping to scale factors
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
real ratio = RECIP(rr1*damp);
damp = -pgamma*ratio*ratio*ratio;
real expdamp = EXP(damp);
scale3 = 1 - expdamp;
scale5 = 1 - (1-damp)*expdamp;
scale7 = 1 - (1-damp+0.6f*damp*damp)*expdamp;
}
real rr3 = rr1*rr1*rr1;
#ifdef APPLY_SCALE
real dsc3 = rr3*(1 - scale3*dScale);
real dsc5 = (3*rr3*rr1*rr1)* (1 - scale5*dScale);
real dsc7 = (15*rr3*rr3*rr1)*(1 - scale7*dScale);
real psc3 = rr3*(1 - scale3*pScale);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5*pScale);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7*pScale);
#else
real psc3 = rr3*(1 - scale3);
real psc5 = (3*rr3*rr1*rr1)*(1 - scale5);
real psc7 = (15*rr3*rr3*rr1)*(1 - scale7);
#endif
real prefactor1 = 0.5f*(psc3 - bn1);
#ifdef APPLY_SCALE
real prefactor2 = 0.5f*(dsc3 - bn1);
#endif
real dixuk1 = di2*atom2.inducedDipole.z - di3*atom2.inducedDipole.y;
real dixukp1 = di2*atom2.inducedDipolePolar.z - di3*atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
real ttm2i1 = prefactor1*dixuk1 + prefactor2*dixukp1;
#else
real ttm2i1 = prefactor1*(dixuk1 + dixukp1);
#endif
real dixuk2 = di3*atom2.inducedDipole.x - di1*atom2.inducedDipole.z;
real dixukp2 = di3*atom2.inducedDipolePolar.x - di1*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
real ttm2i2 = prefactor1*dixuk2 + prefactor2*dixukp2;
#else
real ttm2i2 = prefactor1*(dixuk2 + dixukp2);
#endif
real dixuk3 = di1*atom2.inducedDipole.y - di2*atom2.inducedDipole.x;
real dixukp3 = di1*atom2.inducedDipolePolar.y - di2*atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
real ttm2i3 = prefactor1*dixuk3 + prefactor2*dixukp3;
#else
real ttm2i3 = prefactor1*(dixuk3 + dixukp3);
#endif
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
real gti2 = bn2*(sci4+scip4);
#ifdef APPLY_SCALE
real gtri2 = (sci4*psc5+scip4*dsc5);
#else
real gtri2 = psc5*(sci4+scip4);
#endif
prefactor1 = 0.5f*(gti2 - gtri2);
ttm2i1 += prefactor1*(di2*zr - di3*yr);
ttm2i2 += prefactor1*(di3*xr - di1*zr);
ttm2i3 += prefactor1*(di1*yr - di2*xr);
atom1.torque.x += ttm2i1;
atom1.torque.y += ttm2i2;
atom1.torque.z += ttm2i3;
}
...@@ -2,9 +2,12 @@ ...@@ -2,9 +2,12 @@
typedef struct { typedef struct {
real3 pos, force, torque, dipole, inducedDipole, inducedDipolePolar; real3 pos, force, torque, dipole, inducedDipole, inducedDipolePolar;
real q, quadrupoleXX, quadrupoleXY, quadrupoleXZ; real q;
real quadrupoleYY, quadrupoleYZ; float thole, damp;
float thole, damp, padding; #ifdef INCLUDE_QUADRUPOLES
real quadrupoleXX, quadrupoleXY, quadrupoleXZ, quadrupoleYY, quadrupoleYZ;
float padding;
#endif
} AtomData; } AtomData;
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor, float dScale, float pScale, float mScale, real3& force, real& energy); __device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, real4 delta, real4 bn, real bn5, float forceFactor, float dScale, float pScale, float mScale, real3& force, real& energy);
...@@ -24,11 +27,13 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -24,11 +27,13 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.dipole.x = labFrameDipole[atom*3]; data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1]; data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2]; data.dipole.z = labFrameDipole[atom*3+2];
#ifdef INCLUDE_QUADRUPOLES
data.quadrupoleXX = labFrameQuadrupole[atom*5]; data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1]; data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2]; data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1]; data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2]; data.inducedDipole.z = inducedDipole[atom*3+2];
...@@ -158,12 +163,16 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) { ...@@ -158,12 +163,16 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
real fterm = -EWALD_ALPHA/SQRT_PI; real fterm = -EWALD_ALPHA/SQRT_PI;
real cii = atom1.q*atom1.q; real cii = atom1.q*atom1.q;
real dii = dot(atom1.dipole, atom1.dipole); real dii = dot(atom1.dipole, atom1.dipole);
#ifdef INCLUDE_QUADRUPOLES
real qii = 2*(atom1.quadrupoleXX*atom1.quadrupoleXX + real qii = 2*(atom1.quadrupoleXX*atom1.quadrupoleXX +
atom1.quadrupoleYY*atom1.quadrupoleYY + atom1.quadrupoleYY*atom1.quadrupoleYY +
atom1.quadrupoleXX*atom1.quadrupoleYY + atom1.quadrupoleXX*atom1.quadrupoleYY +
atom1.quadrupoleXY*atom1.quadrupoleXY + atom1.quadrupoleXY*atom1.quadrupoleXY +
atom1.quadrupoleXZ*atom1.quadrupoleXZ + atom1.quadrupoleXZ*atom1.quadrupoleXZ +
atom1.quadrupoleYZ*atom1.quadrupoleYZ); atom1.quadrupoleYZ*atom1.quadrupoleYZ);
#else
real qii = 0;
#endif
real uii = dot(atom1.dipole, atom1.inducedDipole); real uii = dot(atom1.dipole, atom1.inducedDipole);
real selfEnergy = (cii + term*(dii/3 + 2*term*qii/5)); real selfEnergy = (cii + term*(dii/3 + 2*term*qii/5));
selfEnergy += term*uii/3; selfEnergy += term*uii/3;
...@@ -216,11 +225,13 @@ extern "C" __global__ void computeElectrostatics( ...@@ -216,11 +225,13 @@ extern "C" __global__ void computeElectrostatics(
localData[threadIdx.x].pos = data.pos; localData[threadIdx.x].pos = data.pos;
localData[threadIdx.x].q = data.q; localData[threadIdx.x].q = data.q;
localData[threadIdx.x].dipole = data.dipole; localData[threadIdx.x].dipole = data.dipole;
#ifdef INCLUDE_QUADRUPOLES
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX; localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY; localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ; localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY; localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ; localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
#endif
localData[threadIdx.x].inducedDipole = data.inducedDipole; localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar; localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; localData[threadIdx.x].thole = data.thole;
......
...@@ -50,6 +50,8 @@ ...@@ -50,6 +50,8 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std;
const double TOL = 1e-4; const double TOL = 1e-4;
extern "C" void registerAmoebaCudaKernelFactories(); extern "C" void registerAmoebaCudaKernelFactories();
...@@ -2893,6 +2895,75 @@ static void testMultipoleGridPotential( FILE* log ) { ...@@ -2893,6 +2895,75 @@ static void testMultipoleGridPotential( FILE* log ) {
} }
/**
* Test whether the alternate kernels with no quadrupoles work correctly.
*/
static void testNoQuadrupoles(bool usePme) {
string testName = "testNoQuadrupoles";
int inputPmeGridDimension = 10;
double cutoff = 0.3;
System system;
AmoebaMultipoleForce* amoebaMultipoleForce = new AmoebaMultipoleForce();;
setupMultipoleAmmonia(system, amoebaMultipoleForce, usePme ? AmoebaMultipoleForce::PME : AmoebaMultipoleForce::NoCutoff, AmoebaMultipoleForce::Direct,
cutoff, inputPmeGridDimension);
// First compute forces with quadrupoles. This ensures they will be included in the kernel.
LangevinIntegrator integrator(0.0, 0.1, 0.01);
Context context(system, integrator, Platform::getPlatformByName("CUDA"));
vector<Vec3> forces1;
double energy1;
getForcesEnergyMultipoleAmmonia(context, forces1, energy1);
// Now set all quadrupoles to zero and recalculate.
for (int i = 0; i < system.getNumParticles(); i++) {
double charge, thole, damping, polarity;
int axisType, atomX, atomY, atomZ;
vector<double> dipole, quadrupole;
amoebaMultipoleForce->getMultipoleParameters(i, charge, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
for (int j = 0; j < (int) quadrupole.size(); j++)
quadrupole[j] = 0;
amoebaMultipoleForce->setMultipoleParameters(i, charge, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
}
amoebaMultipoleForce->updateParametersInContext(context);
vector<Vec3> forces2;
double energy2;
getForcesEnergyMultipoleAmmonia(context, forces2, energy2);
ASSERT(energy1 != energy2);
// Create a new context with no quadrupoles from the beginning.
LangevinIntegrator integrator2(0.0, 0.1, 0.01);
Context context2(system, integrator2, Platform::getPlatformByName("CUDA"));
vector<Vec3> forces3;
double energy3;
getForcesEnergyMultipoleAmmonia(context2, forces3, energy3);
double tolerance = 1e-5;
compareForcesEnergy(testName, energy2, energy3, forces2, forces3, tolerance, NULL);
// If we try to set a quadrupole, that should produce and exception.
double charge, thole, damping, polarity;
int axisType, atomX, atomY, atomZ;
vector<double> dipole, quadrupole;
amoebaMultipoleForce->getMultipoleParameters(0, charge, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
quadrupole[0] = 1.0;
amoebaMultipoleForce->setMultipoleParameters(0, charge, dipole, quadrupole, axisType, atomZ, atomX, atomY, thole, damping, polarity);
bool threwException = false;
try {
amoebaMultipoleForce->updateParametersInContext(context2);
}
catch (OpenMMException& ex) {
threwException = true;
}
ASSERT(threwException);
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
std::cout << "TestCudaAmoebaMultipoleForce running test..." << std::endl; std::cout << "TestCudaAmoebaMultipoleForce running test..." << std::endl;
...@@ -2940,6 +3011,9 @@ int main(int argc, char* argv[]) { ...@@ -2940,6 +3011,9 @@ int main(int argc, char* argv[]) {
// large box of water // large box of water
testPMEMutualPolarizationLargeWater( log ); testPMEMutualPolarizationLargeWater( log );
testNoQuadrupoles(false);
testNoQuadrupoles(true);
} catch(const std::exception& e) { } catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl; std::cout << "exception: " << e.what() << std::endl;
......
#--------------------------------------------------- #---------------------------------------------------
# OpenMM Reference Amoeba Implementation # OpenMM Reference Amoeba Implementation
# #
# Creates OpenMM library, base name=OpenMMAmoebaReference. # Creates OpenMMAmoebaReference library.
# Default libraries are shared & optimized. Variants
# are created for debug (_d).
# #
# Windows: # Windows:
# OpenMMAmoebaReference[_d].dll # OpenMMAmoebaReference.dll
# OpenMMAmoebaReference[_d].lib # OpenMMAmoebaReference.lib
# Unix: # Unix:
# libOpenMMAmoebaReference[_d].so # libOpenMMAmoebaReference.so
#---------------------------------------------------- #----------------------------------------------------
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
...@@ -23,17 +21,6 @@ SET(OPENMMAMOEBAREFERENCE_LIBRARY_NAME OpenMMAmoebaReference) ...@@ -23,17 +21,6 @@ SET(OPENMMAMOEBAREFERENCE_LIBRARY_NAME OpenMMAmoebaReference)
SET(SHARED_TARGET ${OPENMMAMOEBAREFERENCE_LIBRARY_NAME}) SET(SHARED_TARGET ${OPENMMAMOEBAREFERENCE_LIBRARY_NAME})
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (MSVC)
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (MSVC)
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are # These are all the places to search for header files which are
# to be part of the API. # to be part of the API.
SET(API_INCLUDE_DIRS) # start empty SET(API_INCLUDE_DIRS) # start empty
...@@ -81,13 +68,8 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/reference/src/SimTKRefe ...@@ -81,13 +68,8 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/reference/src/SimTKRefe
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME})
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${SHARED_AMOEBA_TARGET})
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${SHARED_AMOEBA_TARGET} optimized ${SHARED_AMOEBA_TARGET})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY")
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}")
......
#--------------------------------------------------- #---------------------------------------------------
# OpenMM CPU PME Plugin # OpenMM CPU PME Plugin
# #
# Creates plugin library, base name=OpenMMPME. # Creates OpenMMPME plugin library.
# Default libraries are shared & optimized.
# #
# Windows: # Windows:
# OpenMMPME[_d].dll # OpenMMPME.dll
# OpenMMPME[_d].lib # OpenMMPME.lib
# OpenMMPME_static.lib
# Unix: # Unix:
# libOpenMMPME[_d].so # libOpenMMPME.so
# libOpenMMPME_static.a
#---------------------------------------------------- #----------------------------------------------------
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
...@@ -24,18 +25,6 @@ SET(OPENMMPME_LIBRARY_NAME OpenMMPME) ...@@ -24,18 +25,6 @@ SET(OPENMMPME_LIBRARY_NAME OpenMMPME)
SET(SHARED_TARGET ${OPENMMPME_LIBRARY_NAME}) SET(SHARED_TARGET ${OPENMMPME_LIBRARY_NAME})
SET(STATIC_TARGET ${OPENMMPME_LIBRARY_NAME}_static) SET(STATIC_TARGET ${OPENMMPME_LIBRARY_NAME}_static)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF (MSVC)
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
ENDIF (MSVC)
# But on Unix or Cygwin we have to add the suffix manually
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_TARGET ${SHARED_TARGET}_d)
SET(STATIC_TARGET ${STATIC_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# These are all the places to search for header files which are # These are all the places to search for header files which are
# to be part of the API. # to be part of the API.
...@@ -77,12 +66,7 @@ INCLUDE_DIRECTORIES(${FFTW_INCLUDES}) ...@@ -77,12 +66,7 @@ INCLUDE_DIRECTORIES(${FFTW_INCLUDES})
# Build the shared plugin library. # Build the shared plugin library.
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB} ${FFTW_LIBRARY})
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB} ${FFTW_LIBRARY})
IF (FFTW_THREADS_LIBRARY) IF (FFTW_THREADS_LIBRARY)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${FFTW_THREADS_LIBRARY}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${FFTW_THREADS_LIBRARY})
ENDIF (FFTW_THREADS_LIBRARY) ENDIF (FFTW_THREADS_LIBRARY)
...@@ -94,12 +78,7 @@ INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) ...@@ -94,12 +78,7 @@ INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
IF(OPENMM_BUILD_STATIC_LIB) IF(OPENMM_BUILD_STATIC_LIB)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES}) ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB} ${FFTW_LIBRARY})
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB} ${FFTW_LIBRARY})
IF (FFTW_THREADS_LIBRARY) IF (FFTW_THREADS_LIBRARY)
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${FFTW_THREADS_LIBRARY}) TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${FFTW_THREADS_LIBRARY})
ENDIF (FFTW_THREADS_LIBRARY) ENDIF (FFTW_THREADS_LIBRARY)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment