Commit 2e451b9d authored by Peter Eastman's avatar Peter Eastman
Browse files

Deleted the old CUDA platform

parent 352e2fc7
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "AmoebaCudaKernelFactory.h"
#include "AmoebaCudaKernels.h"
#include "CudaPlatform.h"
#include "AmoebaCudaData.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
extern "C" void registerPlatforms() {
}
extern "C" OPENMMCUDA_EXPORT void registerKernelFactories() {
for( int ii = 0; ii < Platform::getNumPlatforms(); ii++ ){
Platform& platform = Platform::getPlatform(ii);
if( platform.getName() == "Cuda" ){
AmoebaCudaKernelFactory* factory = new AmoebaCudaKernelFactory();
platform.registerKernelFactory(CalcAmoebaBondForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaAngleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaInPlaneAngleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaPiTorsionForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaStretchBendForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaOutOfPlaneBendForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaTorsionTorsionForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaMultipoleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaGeneralizedKirkwoodForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaVdwForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaWcaDispersionForceKernel::Name(), factory);
//platform.registerKernelFactory(CalcAmoebaForcesAndEnergyKernel::Name(), factory);
}
}
}
extern "C" OPENMMCUDA_EXPORT void registerAmoebaCudaKernelFactories( void ) {
int hasCudaPlatform = 0;
for( int ii = 0; ii < Platform::getNumPlatforms() && hasCudaPlatform == 0; ii++ ){
Platform& platform = Platform::getPlatform(ii);
if( platform.getName() == "Cuda" ){
hasCudaPlatform = 1;
}
}
if( hasCudaPlatform == 0 ){
if (gpuIsAvailable() ){
Platform::registerPlatform(new CudaPlatform());
}
}
registerKernelFactories();
}
static std::map<ContextImpl*, AmoebaCudaData*> contextToAmoebaDataMap;
// look up AmoebaCudaData for input contextImpl in contextToAmoebaDataMap
extern "C" void* getAmoebaCudaData( ContextImpl& context ) {
std::map<ContextImpl*, AmoebaCudaData*>::const_iterator mapIterator = contextToAmoebaDataMap.find(&context);
if( mapIterator == contextToAmoebaDataMap.end() ){
return NULL;
} else {
return static_cast<void*>(mapIterator->second);
}
}
// remove AmoebaCudaData from contextToAmoebaDataMap
extern "C" void removeAmoebaCudaDataFromContextMap( void* inputContext ) {
ContextImpl* context = static_cast<ContextImpl*>(inputContext);
contextToAmoebaDataMap.erase( context );
return;
}
KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaPlatform::PlatformData& cudaPlatformData = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
// create AmoebaCudaData object if contextToAmoebaDataMap does not contain
// key equal to current context
AmoebaCudaData* amoebaCudaData;
std::map<ContextImpl*, AmoebaCudaData*>::const_iterator mapIterator = contextToAmoebaDataMap.find(&context);
if( mapIterator == contextToAmoebaDataMap.end() ){
amoebaCudaData = new AmoebaCudaData( cudaPlatformData );
contextToAmoebaDataMap[&context] = amoebaCudaData;
//amoebaCudaData->setLog( stderr );
amoebaCudaData->setContextImpl( static_cast<void*>(&context) );
} else {
amoebaCudaData = mapIterator->second;
}
if (name == CalcAmoebaBondForceKernel::Name())
return new CudaCalcAmoebaBondForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaAngleForceKernel::Name())
return new CudaCalcAmoebaAngleForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaInPlaneAngleForceKernel::Name())
return new CudaCalcAmoebaInPlaneAngleForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaPiTorsionForceKernel::Name())
return new CudaCalcAmoebaPiTorsionForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaStretchBendForceKernel::Name())
return new CudaCalcAmoebaStretchBendForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaOutOfPlaneBendForceKernel::Name())
return new CudaCalcAmoebaOutOfPlaneBendForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaTorsionTorsionForceKernel::Name())
return new CudaCalcAmoebaTorsionTorsionForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaMultipoleForceKernel::Name())
return new CudaCalcAmoebaMultipoleForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaVdwForceKernel::Name())
return new CudaCalcAmoebaVdwForceKernel(name, platform, *amoebaCudaData, context.getSystem());
if (name == CalcAmoebaWcaDispersionForceKernel::Name())
return new CudaCalcAmoebaWcaDispersionForceKernel(name, platform, *amoebaCudaData, context.getSystem());
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "AmoebaCudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "kernels/amoebaGpuTypes.h"
#include "kernels/cudaKernels.h"
#include "kernels/amoebaCudaKernels.h"
#include "openmm/internal/AmoebaVdwForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h"
#include "openmm/internal/AmoebaWcaDispersionForceImpl.h"
#include "openmm/internal/AmoebaTorsionTorsionForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "CudaForceInfo.h"
#include <stdio.h>
#include <cmath>
#ifdef _MSC_VER
#include <windows.h>
#endif
extern "C" int gpuSetConstants( gpuContext gpu );
using namespace OpenMM;
using namespace std;
/* -------------------------------------------------------------------------- *
* Calculates bonded forces *
* -------------------------------------------------------------------------- */
static void computeAmoebaLocalForces( AmoebaCudaData& data ) {
amoebaGpuContext gpu = data.getAmoebaGpu();
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "computeAmoebaLocalForces\n" ); (void) fflush( data.getLog() );
}
data.initializeGpu();
kCalculateAmoebaLocalForces(gpu);
}
/* -------------------------------------------------------------------------- *
* AmoebaBondForce *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaBondForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const AmoebaBondForce& force;
};
CudaCalcAmoebaBondForceKernel::CudaCalcAmoebaBondForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaBondForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaBondForceKernel::~CudaCalcAmoebaBondForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaBondForceKernel::initialize(const System& system, const AmoebaBondForce& force) {
data.setAmoebaLocalForcesKernel( this );
numBonds = force.getNumBonds();
std::vector<int> particle1(numBonds);
std::vector<int> particle2(numBonds);
std::vector<float> length(numBonds);
std::vector<float> quadratic(numBonds);
for (int i = 0; i < numBonds; i++) {
int particle1Index, particle2Index;
double lengthValue, kValue;
force.getBondParameters(i, particle1Index, particle2Index, lengthValue, kValue );
particle1[i] = particle1Index;
particle2[i] = particle2Index;
length[i] = static_cast<float>( lengthValue );
quadratic[i] = static_cast<float>( kValue );
}
gpuSetAmoebaBondParameters( data.getAmoebaGpu(), particle1, particle2, length, quadratic,
static_cast<float>(force.getAmoebaGlobalBondCubic()),
static_cast<float>(force.getAmoebaGlobalBondQuartic()) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaInPlaneAngleForce *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const AmoebaAngleForce& force;
};
CudaCalcAmoebaAngleForceKernel::CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaAngleForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaAngleForceKernel::~CudaCalcAmoebaAngleForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaAngleForceKernel::initialize(const System& system, const AmoebaAngleForce& force) {
data.setAmoebaLocalForcesKernel( this );
numAngles = force.getNumAngles();
std::vector<int> particle1(numAngles);
std::vector<int> particle2(numAngles);
std::vector<int> particle3(numAngles);
std::vector<float> angle(numAngles);
std::vector<float> k(numAngles);
for (int i = 0; i < numAngles; i++) {
double angleValue, kQuadratic;
force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kQuadratic);
angle[i] = static_cast<float>( angleValue );
k[i] = static_cast<float>( kQuadratic );
}
gpuSetAmoebaAngleParameters(data.getAmoebaGpu(), particle1, particle2, particle3, angle, k,
static_cast<float>(force.getAmoebaGlobalAngleCubic()),
static_cast<float>(force.getAmoebaGlobalAngleQuartic()),
static_cast<float>(force.getAmoebaGlobalAnglePentic()),
static_cast<float>(force.getAmoebaGlobalAngleSextic()) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaInPlaneAngleForce *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaInPlaneAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaInPlaneAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3, particle4;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, particle4, angle, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, particle4, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, particle4, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const AmoebaInPlaneAngleForce& force;
};
CudaCalcAmoebaInPlaneAngleForceKernel::CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaInPlaneAngleForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaInPlaneAngleForceKernel::~CudaCalcAmoebaInPlaneAngleForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaInPlaneAngleForceKernel::initialize(const System& system, const AmoebaInPlaneAngleForce& force) {
data.setAmoebaLocalForcesKernel( this );
numAngles = force.getNumAngles();
std::vector<int> particle1(numAngles);
std::vector<int> particle2(numAngles);
std::vector<int> particle3(numAngles);
std::vector<int> particle4(numAngles);
std::vector<float> angle(numAngles);
std::vector<float> k(numAngles);
for (int i = 0; i < numAngles; i++) {
double angleValue, kQuadratic;
force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], angleValue, kQuadratic);
//angle[i] = static_cast<float>( (angleValue*RadiansToDegrees) );
angle[i] = static_cast<float>( angleValue );
k[i] = static_cast<float>( kQuadratic );
}
gpuSetAmoebaInPlaneAngleParameters(data.getAmoebaGpu(), particle1, particle2, particle3, particle4, angle, k,
static_cast<float>( force.getAmoebaGlobalInPlaneAngleCubic()),
static_cast<float>( force.getAmoebaGlobalInPlaneAngleQuartic()),
static_cast<float>( force.getAmoebaGlobalInPlaneAnglePentic()),
static_cast<float>( force.getAmoebaGlobalInPlaneAngleSextic() ) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaInPlaneAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaPiTorsionForce *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaPiTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaPiTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumPiTorsions();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3, particle4, particle5, particle6;
double k;
force.getPiTorsionParameters(index, particle1, particle2, particle3, particle4, particle5, particle6, k);
particles.resize(6);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
particles[4] = particle5;
particles[5] = particle6;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, particle5, particle6;
double k1, k2;
force.getPiTorsionParameters(group1, particle1, particle2, particle3, particle4, particle5, particle6, k1);
force.getPiTorsionParameters(group2, particle1, particle2, particle3, particle4, particle5, particle6, k2);
return (k1 == k2);
}
private:
const AmoebaPiTorsionForce& force;
};
CudaCalcAmoebaPiTorsionForceKernel::CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaPiTorsionForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaPiTorsionForceKernel::~CudaCalcAmoebaPiTorsionForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaPiTorsionForceKernel::initialize(const System& system, const AmoebaPiTorsionForce& force) {
data.setAmoebaLocalForcesKernel( this );
numPiTorsions = force.getNumPiTorsions();
std::vector<int> particle1(numPiTorsions);
std::vector<int> particle2(numPiTorsions);
std::vector<int> particle3(numPiTorsions);
std::vector<int> particle4(numPiTorsions);
std::vector<int> particle5(numPiTorsions);
std::vector<int> particle6(numPiTorsions);
std::vector<float> torsionKParameters(numPiTorsions);
for (int i = 0; i < numPiTorsions; i++) {
double torsionKParameter;
force.getPiTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], particle5[i], particle6[i], torsionKParameter);
torsionKParameters[i] = static_cast<float>(torsionKParameter);
}
gpuSetAmoebaPiTorsionParameters(data.getAmoebaGpu(), particle1, particle2, particle3, particle4, particle5, particle6, torsionKParameters);
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaPiTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaStretchBend *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaStretchBendForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaStretchBendForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumStretchBends();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3;
double lengthAB, lengthCB, angle, k;
force.getStretchBendParameters(index, particle1, particle2, particle3, lengthAB, lengthCB, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double lengthAB1, lengthAB2, lengthCB1, lengthCB2, angle1, angle2, k1, k2;
force.getStretchBendParameters(group1, particle1, particle2, particle3, lengthAB1, lengthCB1, angle1, k1);
force.getStretchBendParameters(group2, particle1, particle2, particle3, lengthAB2, lengthCB2, angle2, k2);
return (lengthAB1 == lengthAB2 && lengthCB1 == lengthCB2 && angle1 == angle2 && k1 == k2);
}
private:
const AmoebaStretchBendForce& force;
};
CudaCalcAmoebaStretchBendForceKernel::CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaStretchBendForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaStretchBendForceKernel::~CudaCalcAmoebaStretchBendForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaStretchBendForceKernel::initialize(const System& system, const AmoebaStretchBendForce& force) {
data.setAmoebaLocalForcesKernel( this );
numStretchBends = force.getNumStretchBends();
std::vector<int> particle1(numStretchBends);
std::vector<int> particle2(numStretchBends);
std::vector<int> particle3(numStretchBends);
std::vector<float> lengthABParameters(numStretchBends);
std::vector<float> lengthCBParameters(numStretchBends);
std::vector<float> angleParameters(numStretchBends);
std::vector<float> kParameters(numStretchBends);
for (int i = 0; i < numStretchBends; i++) {
double lengthAB, lengthCB, angle, k;
force.getStretchBendParameters(i, particle1[i], particle2[i], particle3[i], lengthAB, lengthCB, angle, k);
lengthABParameters[i] = static_cast<float>(lengthAB);
lengthCBParameters[i] = static_cast<float>(lengthCB);
angleParameters[i] = static_cast<float>(angle);
kParameters[i] = static_cast<float>(k);
}
gpuSetAmoebaStretchBendParameters(data.getAmoebaGpu(), particle1, particle2, particle3, lengthABParameters, lengthCBParameters, angleParameters, kParameters);
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaStretchBendForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaOutOfPlaneBend *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaOutOfPlaneBendForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaOutOfPlaneBendForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumOutOfPlaneBends();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3, particle4;
double k;
force.getOutOfPlaneBendParameters(index, particle1, particle2, particle3, particle4, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double k1, k2;
force.getOutOfPlaneBendParameters(group1, particle1, particle2, particle3, particle4, k1);
force.getOutOfPlaneBendParameters(group2, particle1, particle2, particle3, particle4, k2);
return (k1 == k2);
}
private:
const AmoebaOutOfPlaneBendForce& force;
};
CudaCalcAmoebaOutOfPlaneBendForceKernel::CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaOutOfPlaneBendForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaOutOfPlaneBendForceKernel::~CudaCalcAmoebaOutOfPlaneBendForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaOutOfPlaneBendForceKernel::initialize(const System& system, const AmoebaOutOfPlaneBendForce& force) {
data.setAmoebaLocalForcesKernel( this );
numOutOfPlaneBends = force.getNumOutOfPlaneBends();
std::vector<int> particle1(numOutOfPlaneBends);
std::vector<int> particle2(numOutOfPlaneBends);
std::vector<int> particle3(numOutOfPlaneBends);
std::vector<int> particle4(numOutOfPlaneBends);
std::vector<float> kParameters(numOutOfPlaneBends);
for (int i = 0; i < numOutOfPlaneBends; i++) {
double k;
force.getOutOfPlaneBendParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], k);
kParameters[i] = static_cast<float>(k);
}
gpuSetAmoebaOutOfPlaneBendParameters(data.getAmoebaGpu(), particle1, particle2, particle3, particle4, kParameters,
static_cast<float>( force.getAmoebaGlobalOutOfPlaneBendCubic()),
static_cast<float>( force.getAmoebaGlobalOutOfPlaneBendQuartic()),
static_cast<float>( force.getAmoebaGlobalOutOfPlaneBendPentic()),
static_cast<float>( force.getAmoebaGlobalOutOfPlaneBendSextic() ) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaOutOfPlaneBendForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaTorsionTorsion *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaTorsionTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaTorsionTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsionTorsions();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2, particle3, particle4, particle5, chiralCheckAtomIndex, gridIndex;
force.getTorsionTorsionParameters(index, particle1, particle2, particle3, particle4, particle5, chiralCheckAtomIndex, gridIndex);
particles.resize(5);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
particles[4] = particle5;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, particle5;
int chiral1, chiral2, grid1, grid2;
force.getTorsionTorsionParameters(group1, particle1, particle2, particle3, particle4, particle5, chiral1, grid1);
force.getTorsionTorsionParameters(group2, particle1, particle2, particle3, particle4, particle5, chiral2, grid2);
return (grid1 == grid2);
}
private:
const AmoebaTorsionTorsionForce& force;
};
CudaCalcAmoebaTorsionTorsionForceKernel::CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaTorsionTorsionForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaTorsionTorsionForceKernel::~CudaCalcAmoebaTorsionTorsionForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaTorsionTorsionForceKernel::initialize(const System& system, const AmoebaTorsionTorsionForce& force) {
data.setAmoebaLocalForcesKernel( this );
numTorsionTorsions = force.getNumTorsionTorsions();
// torsion-torsion parameters
std::vector<int> particle1(numTorsionTorsions);
std::vector<int> particle2(numTorsionTorsions);
std::vector<int> particle3(numTorsionTorsions);
std::vector<int> particle4(numTorsionTorsions);
std::vector<int> particle5(numTorsionTorsions);
std::vector<int> chiralCheckAtomIndex(numTorsionTorsions);
std::vector<int> gridIndices(numTorsionTorsions);
for (int i = 0; i < numTorsionTorsions; i++) {
force.getTorsionTorsionParameters(i, particle1[i], particle2[i], particle3[i],
particle4[i], particle5[i],
chiralCheckAtomIndex[i], gridIndices[i]);
}
gpuSetAmoebaTorsionTorsionParameters(data.getAmoebaGpu(), particle1, particle2, particle3, particle4, particle5, chiralCheckAtomIndex, gridIndices );
// torsion-torsion grids
numTorsionTorsionGrids = force.getNumTorsionTorsionGrids();
std::vector<TorsionTorsionGridFloat> floatGrids;
floatGrids.resize(numTorsionTorsionGrids);
for (int gridIndex = 0; gridIndex < numTorsionTorsionGrids; gridIndex++) {
const TorsionTorsionGrid& grid = force.getTorsionTorsionGrid( gridIndex );
floatGrids[gridIndex].resize( grid.size() );
// check if grid needs to be reordered: x-angle should be 'slow' index
TorsionTorsionGrid reorderedGrid;
int reorder = 0;
if( grid[0][0][0] != grid[0][1][0] ){
AmoebaTorsionTorsionForceImpl::reorderGrid( grid, reorderedGrid );
reorder = 1;
if( data.getLog() ){
(void) fprintf( data.getLog(), "CudaCalcAmoebaTorsionTorsionForceKernel Reordered torsion-torsion grid %4d [%u %u] %12.3f %12.3f [%u %u] %12.3f %12.3f.\n",
gridIndex, static_cast<unsigned int>(grid.size()), static_cast<unsigned int>(grid[0].size()), grid[0][0][0], grid[0][1][0],
static_cast<unsigned int>(reorderedGrid.size() ), static_cast<unsigned int>(reorderedGrid[0].size() ), reorderedGrid[0][0][0], reorderedGrid[0][1][0] );
}
}
for (unsigned int ii = 0; ii < grid.size(); ii++) {
floatGrids[gridIndex][ii].resize( grid[ii].size() );
for (unsigned int jj = 0; jj < grid[ii].size(); jj++) {
floatGrids[gridIndex][ii][jj].resize( grid[ii][jj].size() );
if( reorder ){
for( unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
floatGrids[gridIndex][ii][jj][kk] = static_cast<float>(reorderedGrid[ii][jj][kk]);
}
} else {
for( unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
floatGrids[gridIndex][ii][jj][kk] = static_cast<float>(grid[ii][jj][kk]);
}
}
}
}
}
gpuSetAmoebaTorsionTorsionGrids(data.getAmoebaGpu(), floatGrids );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaTorsionTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if( data.getAmoebaLocalForcesKernel() == this ){
computeAmoebaLocalForces( data );
}
return 0.0;
}
/* -------------------------------------------------------------------------- *
* AmoebaMultipole *
* -------------------------------------------------------------------------- */
static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
amoebaGpuContext gpu = data.getAmoebaGpu();
data.incrementMultipoleForceCount();
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "In computeAmoebaMultipoleForce hasAmoebaGeneralizedKirkwood=%d\n",
data.getHasAmoebaGeneralizedKirkwood() );
(void) fflush( data.getLog());
}
data.initializeGpu();
// calculate Born radii using either the Grycuk or OBC algorithm if GK is active
if( data.getHasAmoebaGeneralizedKirkwood() ){
kClearBornSum( gpu->gpuContext );
if( data.getUseGrycuk() ){
kCalculateAmoebaGrycukBornRadii( gpu );
kReduceGrycukGbsaBornSum( gpu );
} else {
throw OpenMMException("AmoebaGeneralizedKirkwood: Born radii must be calcualted using the Grycuk algorithm." );
}
}
// multipoles
kCalculateAmoebaMultipoleForces(gpu, data.getHasAmoebaGeneralizedKirkwood() );
// GK
if( data.getHasAmoebaGeneralizedKirkwood() ){
kCalculateAmoebaKirkwood(gpu);
}
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "completed computeAmoebaMultipoleForce\n" );
(void) fflush( data.getLog());
}
}
static void computeAmoebaMultipolePotential( AmoebaCudaData& data, const std::vector< Vec3 >& inputGrid,
std::vector< double >& outputElectrostaticPotential) {
amoebaGpuContext gpu = data.getAmoebaGpu();
// load grid to board and allocate board memory for potential buffers
// calculate potential
// load potential into return vector
// deallocate board memory
gpuSetupElectrostaticPotentialCalculation( gpu, inputGrid );
data.setGpuInitialized( false );
data.initializeGpu();
kCalculateAmoebaMultipolePotential( gpu );
gpuLoadElectrostaticPotential( gpu, inputGrid.size(), outputElectrostaticPotential );
gpuCleanupElectrostaticPotentialCalculation( gpu );
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "completed computeAmoebaMultipolePotential\n" );
(void) fflush( data.getLog());
}
}
static void computeAmoebaSystemMultipoleMoments( AmoebaCudaData& data, std::vector< double >& outputMultipoleMonents) {
amoebaGpuContext gpu = data.getAmoebaGpu();
data.setGpuInitialized( false );
data.initializeGpu();
kCalculateAmoebaSystemMultipoleMoments( gpu, outputMultipoleMonents );
}
class CudaCalcAmoebaMultipoleForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaMultipoleForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, thole1, thole2, damping1, damping2, polarity1, polarity2;
int axis1, axis2, multipole11, multipole12, multipole21, multipole22, multipole31, multipole32;
vector<double> dipole1, dipole2, quadrupole1, quadrupole2;
force.getMultipoleParameters(particle1, charge1, dipole1, quadrupole1, axis1, multipole11, multipole21, multipole31, thole1, damping1, polarity1);
force.getMultipoleParameters(particle2, charge2, dipole2, quadrupole2, axis2, multipole12, multipole22, multipole32, thole2, damping2, polarity2);
if (charge1 != charge2 || thole1 != thole2 || damping1 != damping2 || polarity1 != polarity2 || axis1 != axis2){
return false;
}
for (int i = 0; i < (int) dipole1.size(); ++i){
if (dipole1[i] != dipole2[i]){
return false;
}
}
for (int i = 0; i < (int) quadrupole1.size(); ++i){
if (quadrupole1[i] != quadrupole2[i]){
return false;
}
}
return true;
}
private:
const AmoebaMultipoleForce& force;
};
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
numMultipoles = force.getNumMultipoles();
data.setHasAmoebaMultipole( true );
std::vector<float> charges(numMultipoles);
std::vector<float> dipoles(3*numMultipoles);
std::vector<float> quadrupoles(9*numMultipoles);
std::vector<float> tholes(numMultipoles);
std::vector<float> dampingFactors(numMultipoles);
std::vector<float> polarity(numMultipoles);
std::vector<int> axisTypes(numMultipoles);
std::vector<int> multipoleAtomZs(numMultipoles);
std::vector<int> multipoleAtomXs(numMultipoles);
std::vector<int> multipoleAtomYs(numMultipoles);
std::vector< std::vector< std::vector<int> > > multipoleAtomCovalentInfo(numMultipoles);
std::vector<int> minCovalentIndices(numMultipoles);
std::vector<int> minCovalentPolarizationIndices(numMultipoles);
float scalingDistanceCutoff = 50.0f;
std::vector<AmoebaMultipoleForce::CovalentType> covalentList;
covalentList.push_back( AmoebaMultipoleForce::Covalent12 );
covalentList.push_back( AmoebaMultipoleForce::Covalent13 );
covalentList.push_back( AmoebaMultipoleForce::Covalent14 );
covalentList.push_back( AmoebaMultipoleForce::Covalent15 );
std::vector<AmoebaMultipoleForce::CovalentType> polarizationCovalentList;
polarizationCovalentList.push_back( AmoebaMultipoleForce::PolarizationCovalent11 );
polarizationCovalentList.push_back( AmoebaMultipoleForce::PolarizationCovalent12 );
polarizationCovalentList.push_back( AmoebaMultipoleForce::PolarizationCovalent13 );
polarizationCovalentList.push_back( AmoebaMultipoleForce::PolarizationCovalent14 );
std::vector<int> covalentDegree;
AmoebaMultipoleForceImpl::getCovalentDegree( force, covalentDegree );
int dipoleIndex = 0;
int quadrupoleIndex = 0;
int maxCovalentRange = 0;
double totalCharge = 0.0;
for (int i = 0; i < numMultipoles; i++) {
// multipoles
int axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY;
double charge, tholeD, dampingFactorD, polarityD;
std::vector<double> dipolesD;
std::vector<double> quadrupolesD;
force.getMultipoleParameters(i, charge, dipolesD, quadrupolesD, axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY,
tholeD, dampingFactorD, polarityD );
totalCharge += charge;
axisTypes[i] = axisType;
multipoleAtomZs[i] = multipoleAtomZ;
multipoleAtomXs[i] = multipoleAtomX;
multipoleAtomYs[i] = multipoleAtomY;
charges[i] = static_cast<float>(charge);
tholes[i] = static_cast<float>(tholeD);
dampingFactors[i] = static_cast<float>(dampingFactorD);
polarity[i] = static_cast<float>(polarityD);
dipoles[dipoleIndex++] = static_cast<float>(dipolesD[0]);
dipoles[dipoleIndex++] = static_cast<float>(dipolesD[1]);
dipoles[dipoleIndex++] = static_cast<float>(dipolesD[2]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[0]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[1]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[2]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[3]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[4]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[5]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[6]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[7]);
quadrupoles[quadrupoleIndex++] = static_cast<float>(quadrupolesD[8]);
// covalent info
std::vector< std::vector<int> > covalentLists;
force.getCovalentMaps(i, covalentLists );
multipoleAtomCovalentInfo[i] = covalentLists;
int minCovalentIndex, maxCovalentIndex;
AmoebaMultipoleForceImpl::getCovalentRange( force, i, covalentList, &minCovalentIndex, &maxCovalentIndex );
minCovalentIndices[i] = minCovalentIndex;
if( maxCovalentRange < (maxCovalentIndex - minCovalentIndex) ){
maxCovalentRange = maxCovalentIndex - minCovalentIndex;
}
AmoebaMultipoleForceImpl::getCovalentRange( force, i, polarizationCovalentList, &minCovalentIndex, &maxCovalentIndex );
minCovalentPolarizationIndices[i] = minCovalentIndex;
if( maxCovalentRange < (maxCovalentIndex - minCovalentIndex) ){
maxCovalentRange = maxCovalentIndex - minCovalentIndex;
}
}
int polarizationType = static_cast<int>(force.getPolarizationType());
int nonbondedMethod = static_cast<int>(force.getNonbondedMethod());
if( nonbondedMethod != 0 && nonbondedMethod != 1 ){
throw OpenMMException("AmoebaMultipoleForce nonbonded method not recognized.\n");
}
if( polarizationType != 0 && polarizationType != 1 ){
throw OpenMMException("AmoebaMultipoleForce polarization type not recognized.\n");
}
gpuSetAmoebaMultipoleParameters(data.getAmoebaGpu(), charges, dipoles, quadrupoles, axisTypes, multipoleAtomZs, multipoleAtomXs, multipoleAtomYs,
tholes, scalingDistanceCutoff, dampingFactors, polarity,
multipoleAtomCovalentInfo, covalentDegree, minCovalentIndices, minCovalentPolarizationIndices, (maxCovalentRange+2),
0, force.getMutualInducedMaxIterations(),
static_cast<float>( force.getMutualInducedTargetEpsilon()),
nonbondedMethod, polarizationType,
static_cast<float>( force.getCutoffDistance()),
static_cast<float>( force.getAEwald()) );
if (nonbondedMethod == AmoebaMultipoleForce::PME) {
double alpha = force.getAEwald();
int xsize, ysize, zsize;
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
nb.setCutoffDistance(force.getCutoffDistance());
std::vector<int> pmeGridDimension;
force.getPmeGridDimensions( pmeGridDimension );
int pmeParametersSetBasedOnEwaldErrorTolerance;
if( pmeGridDimension[0] == 0 || alpha == 0.0 ){
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, xsize, ysize, zsize);
pmeParametersSetBasedOnEwaldErrorTolerance = 1;
} else {
alpha = force.getAEwald();
xsize = pmeGridDimension[0];
ysize = pmeGridDimension[1];
zsize = pmeGridDimension[2];
pmeParametersSetBasedOnEwaldErrorTolerance = 0;
}
gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize);
if( data.getLog() ){
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]\n",
force.getEwaldErrorTolerance(), force.getCutoffDistance(), alpha, xsize, ysize, zsize );
if( pmeParametersSetBasedOnEwaldErrorTolerance ){
(void) fprintf( data.getLog(), "Parameters based on error tolerance and OpenMM algorithm.\n" );
} else {
double alphaT;
int xsizeT, ysizeT, zsizeT;
NonbondedForceImpl::calcPMEParameters(system, nb, alphaT, xsizeT, ysizeT, zsizeT);
double impliedTolerance = alpha*force.getCutoffDistance();
impliedTolerance = 0.5*exp( -(impliedTolerance*impliedTolerance) );
(void) fprintf( data.getLog(), "Using input parameters implied tolerance=%12.3e;", impliedTolerance );
(void) fprintf( data.getLog(), "OpenMM param: aEwald=%12.3f [%6d %6d %6d]\n", alphaT, xsizeT, ysizeT, zsizeT);
}
(void) fprintf( data.getLog(), "\n" );
(void) fflush( data.getLog() );
}
data.setApplyMultipoleCutoff( 1 );
data.cudaPlatformData.nonbondedMethod = PARTICLE_MESH_EWALD;
amoebaGpuContext amoebaGpu = data.getAmoebaGpu();
gpuContext gpu = amoebaGpu->gpuContext;
gpu->sim.nonbondedCutoffSqr = static_cast<float>(force.getCutoffDistance()*force.getCutoffDistance());
gpu->sim.nonbondedMethod = PARTICLE_MESH_EWALD;
}
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
computeAmoebaMultipoleForce( data );
return 0.0;
}
void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl& context, const std::vector< Vec3 >& inputGrid,
std::vector< double >& outputElectrostaticPotential) {
computeAmoebaMultipolePotential( data, inputGrid, outputElectrostaticPotential );
return;
}
void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl& context, std::vector< double >& outputMultipoleMonents) {
computeAmoebaSystemMultipoleMoments( data, outputMultipoleMonents);
return;
}
/* -------------------------------------------------------------------------- *
* AmoebaGeneralizedKirkwood *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaGeneralizedKirkwoodForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
force.getParticleParameters(particle1, charge1, radius1, scale1);
force.getParticleParameters(particle2, charge2, radius2, scale2);
return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
}
private:
const AmoebaGeneralizedKirkwoodForce& force;
};
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force) {
data.setHasAmoebaGeneralizedKirkwood( true );
int numParticles = system.getNumParticles();
std::vector<float> radius(numParticles);
std::vector<float> scale(numParticles);
std::vector<float> charge(numParticles);
for( int ii = 0; ii < numParticles; ii++ ){
double particleCharge, particleRadius, scalingFactor;
force.getParticleParameters(ii, particleCharge, particleRadius, scalingFactor);
radius[ii] = static_cast<float>( particleRadius );
scale[ii] = static_cast<float>( scalingFactor );
charge[ii] = static_cast<float>( particleCharge );
}
if( data.getUseGrycuk() ){
gpuSetAmoebaGrycukParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
static_cast<float>( force.getSolventDielectric() ),
radius, scale, charge,
force.getIncludeCavityTerm(),
static_cast<float>( force.getProbeRadius() ),
static_cast<float>( force.getSurfaceAreaFactor() ) );
} else {
gpuSetAmoebaObcParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
static_cast<float>( force.getSolventDielectric() ),
radius, scale, charge,
force.getIncludeCavityTerm(),
static_cast<float>( force.getProbeRadius() ),
static_cast<float>( force.getSurfaceAreaFactor() ) );
}
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaGeneralizedKirkwoodForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// handled in computeAmoebaMultipoleForce()
return 0.0;
}
static void computeAmoebaVdwForce( AmoebaCudaData& data ) {
amoebaGpuContext gpu = data.getAmoebaGpu();
data.initializeGpu();
// Vdw14_7F
kCalculateAmoebaVdw14_7Forces(gpu, data.getUseVdwNeighborList());
}
/* -------------------------------------------------------------------------- *
* AmoebaVdw *
* -------------------------------------------------------------------------- */
class CudaCalcAmoebaVdwForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaVdwForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
int iv1, iv2;
double sigma1, sigma2, epsilon1, epsilon2, reduction1, reduction2;
force.getParticleParameters(particle1, iv1, sigma1, epsilon1, reduction1);
force.getParticleParameters(particle2, iv2, sigma2, epsilon2, reduction2);
return (sigma1 == sigma2 && epsilon1 == epsilon2 && reduction1 == reduction2);
}
private:
const AmoebaVdwForce& force;
};
CudaCalcAmoebaVdwForceKernel::CudaCalcAmoebaVdwForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaVdwForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaVdwForceKernel::~CudaCalcAmoebaVdwForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const AmoebaVdwForce& force) {
// per-particle parameters
int numParticles = system.getNumParticles();
std::vector<int> indexIVs(numParticles);
std::vector< std::vector<int> > allExclusions(numParticles);
std::vector<float> sigmas(numParticles);
std::vector<float> epsilons(numParticles);
std::vector<float> reductions(numParticles);
for( int ii = 0; ii < numParticles; ii++ ){
int indexIV;
double sigma, epsilon, reduction;
std::vector<int> exclusions;
force.getParticleParameters( ii, indexIV, sigma, epsilon, reduction );
force.getParticleExclusions( ii, exclusions );
for( unsigned int jj = 0; jj < exclusions.size(); jj++ ){
allExclusions[ii].push_back( exclusions[jj] );
}
indexIVs[ii] = indexIV;
sigmas[ii] = static_cast<float>( sigma );
epsilons[ii] = static_cast<float>( epsilon );
reductions[ii] = static_cast<float>( reduction );
}
bool useCutoff = (force.getNonbondedMethod() == AmoebaVdwForce::CutoffPeriodic);
gpuSetAmoebaVdwParameters( data.getAmoebaGpu(), indexIVs, sigmas, epsilons, reductions,
force.getSigmaCombiningRule(), force.getEpsilonCombiningRule(),
allExclusions, useCutoff, static_cast<float>(force.getCutoff()) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
if( data.getLog() ){
(void) fprintf( data.getLog(), "CudaCalcAmoebaVdwForceKernel useCutoff=%d\n",
useCutoff );
}
data.setUseVdwNeighborList(force.getNonbondedMethod() != AmoebaVdwForce::NoCutoff);
if (force.getUseDispersionCorrection())
data.dispersionCoefficient = AmoebaVdwForceImpl::calcDispersionCorrection(system, force);
else
data.dispersionCoefficient = 0.0;
}
double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
_gpuContext* gpu = data.cudaPlatformData.gpu;
computeAmoebaVdwForce( data );
if (data.dispersionCoefficient != 0.0) {
double Answer = data.dispersionCoefficient/(gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ);
return Answer;
} else {
return 0.0;
}
}
/* -------------------------------------------------------------------------- *
* AmoebaWcaDispersion *
* -------------------------------------------------------------------------- */
static void computeAmoebaWcaDispersionForce( AmoebaCudaData& data ) {
data.initializeGpu();
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "Calling computeAmoebaWcaDispersionForce " ); (void) fflush( data.getLog() );
}
kCalculateAmoebaWcaDispersionForces( data.getAmoebaGpu() );
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), " -- completed\n" ); (void) fflush( data.getLog() );
}
}
class CudaCalcAmoebaWcaDispersionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const AmoebaWcaDispersionForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double radius1, radius2, epsilon1, epsilon2;
force.getParticleParameters(particle1, radius1, epsilon1);
force.getParticleParameters(particle2, radius2, epsilon2);
return (radius1 == radius2 && epsilon1 == epsilon2);
}
private:
const AmoebaWcaDispersionForce& force;
};
CudaCalcAmoebaWcaDispersionForceKernel::CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system) :
CalcAmoebaWcaDispersionForceKernel(name, platform), data(data), system(system) {
data.incrementKernelCount();
}
CudaCalcAmoebaWcaDispersionForceKernel::~CudaCalcAmoebaWcaDispersionForceKernel() {
data.decrementKernelCount();
}
void CudaCalcAmoebaWcaDispersionForceKernel::initialize(const System& system, const AmoebaWcaDispersionForce& force) {
// per-particle parameters
int numParticles = system.getNumParticles();
std::vector<float> radii(numParticles);
std::vector<float> epsilons(numParticles);
for( int ii = 0; ii < numParticles; ii++ ){
double radius, epsilon;
force.getParticleParameters( ii, radius, epsilon );
radii[ii] = static_cast<float>( radius );
epsilons[ii] = static_cast<float>( epsilon );
}
float totalMaximumDispersionEnergy = static_cast<float>( AmoebaWcaDispersionForceImpl::getTotalMaximumDispersionEnergy( force ) );
gpuSetAmoebaWcaDispersionParameters( data.getAmoebaGpu(), radii, epsilons, totalMaximumDispersionEnergy,
static_cast<float>( force.getEpso( )),
static_cast<float>( force.getEpsh( )),
static_cast<float>( force.getRmino( )),
static_cast<float>( force.getRminh( )),
static_cast<float>( force.getAwater( )),
static_cast<float>( force.getShctd( )),
static_cast<float>( force.getDispoff( ) ) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
double CudaCalcAmoebaWcaDispersionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
computeAmoebaWcaDispersionForce( data );
return 0.0;
}
#ifndef AMOEBA_OPENMM_CUDAKERNELS_H_
#define AMOEBA_OPENMM_CUDAKERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/amoebaKernels.h"
#include "CudaKernels.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "AmoebaCudaData.h"
namespace OpenMM {
/**
* This kernel is invoked by AmoebaBondForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaBondForceKernel : public CalcAmoebaBondForceKernel {
public:
CudaCalcAmoebaBondForceKernel(std::string name,
const Platform& platform,
AmoebaCudaData& data,
System& system);
~CudaCalcAmoebaBondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaBondForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numBonds;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaAngleForceKernel : public CalcAmoebaAngleForceKernel {
public:
CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaAngleForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaAngleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numAngles;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaInPlaneAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaInPlaneAngleForceKernel : public CalcAmoebaInPlaneAngleForceKernel {
public:
CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaInPlaneAngleForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaInPlaneAngleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaInPlaneAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numAngles;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaPiTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaPiTorsionForceKernel : public CalcAmoebaPiTorsionForceKernel {
public:
CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaPiTorsionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaPiTorsionForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaPiTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numPiTorsions;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaStretchBendForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaStretchBendForceKernel : public CalcAmoebaStretchBendForceKernel {
public:
CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaStretchBendForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaStretchBendForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaStretchBendForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numStretchBends;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaOutOfPlaneBendForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaOutOfPlaneBendForceKernel : public CalcAmoebaOutOfPlaneBendForceKernel {
public:
CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaOutOfPlaneBendForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaOutOfPlaneBendForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaOutOfPlaneBendForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numOutOfPlaneBends;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaTorsionTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaTorsionTorsionForceKernel : public CalcAmoebaTorsionTorsionForceKernel {
public:
CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaTorsionTorsionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaTorsionTorsionForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaTorsionTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
int numTorsionTorsions;
int numTorsionTorsionGrids;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaMultipoleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaMultipoleForceKernel : public CalcAmoebaMultipoleForceKernel {
public:
CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaMultipoleForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaMultipoleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaMultipoleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Execute the kernel to calculate the electrostatic potential
*
* @param context the context in which to execute this kernel
* @param inputGrid input grid coordinates
* @param outputElectrostaticPotential output potential
*/
void getElectrostaticPotential(ContextImpl& context, const std::vector< Vec3 >& inputGrid,
std::vector< double >& outputElectrostaticPotential );
/**
* Get the system multipole moments
*
* @param context context
* @param outputMultipoleMonents (charge,
dipole_x, dipole_y, dipole_z,
quadrupole_xx, quadrupole_xy, quadrupole_xz,
quadrupole_yx, quadrupole_yy, quadrupole_yz,
quadrupole_zx, quadrupole_zy, quadrupole_zz )
*/
void getSystemMultipoleMoments( ContextImpl& context, std::vector< double >& outputMultipoleMonents );
private:
class ForceInfo;
int numMultipoles;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked by AmoebaMultipoleForce to calculate the forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaGeneralizedKirkwoodForceKernel : public CalcAmoebaGeneralizedKirkwoodForceKernel {
public:
CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaGeneralizedKirkwoodForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaMultipoleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaGeneralizedKirkwoodForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked to calculate the vdw forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaVdwForceKernel : public CalcAmoebaVdwForceKernel {
public:
CudaCalcAmoebaVdwForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaVdwForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaMultipoleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaVdwForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
AmoebaCudaData& data;
System& system;
};
/**
* This kernel is invoked to calculate the WCA dispersion forces acting on the system and the energy of the system.
*/
class CudaCalcAmoebaWcaDispersionForceKernel : public CalcAmoebaWcaDispersionForceKernel {
public:
CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, AmoebaCudaData& data, System& system);
~CudaCalcAmoebaWcaDispersionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the AmoebaMultipoleForce this kernel will be used for
*/
void initialize(const System& system, const AmoebaWcaDispersionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private:
class ForceInfo;
AmoebaCudaData& data;
System& system;
};
} // namespace OpenMM
#endif /*AMOEBA_OPENMM_CUDAKERNELS_H*/
This source diff could not be displayed because it is too large. You can view the blob instead.
#ifndef __AMOEBA_GPU_TYPES_H__
#define __AMOEBA_GPU_TYPES_H__
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "openmm/Vec3.h"
#include <string>
#include <vector>
typedef std::vector<std::string> StringVector;
typedef std::vector<StringVector> StringVectorVector;
#define SQRT sqrtf
#define EXP expf
#define DOT3(u,v) ((u[0])*(v[0]) + (u[1])*(v[1]) + (u[2])*(v[2]))
typedef std::vector<std::vector<double> > VectorOfDoubleVectors;
// local (bond) forces
extern void SetCalculateAmoebaLocalForcesSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaLocalForcesSim(amoebaGpuContext gpu);
extern void kCalculateAmoebaLocalForces(amoebaGpuContext gpu);
// multipole forces
extern void SetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaMultipoleForcesSim(amoebaGpuContext gpu);
extern void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool performGk );
extern void kSetupAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaGeneralizedKirkwood );
// multipole potential
extern void SetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext gpu);
extern void kCalculateAmoebaMultipolePotential(amoebaGpuContext amoebaGpu );
// system multipole moments
extern void kCalculateAmoebaSystemMultipoleMoments(amoebaGpuContext amoebaGpu, std::vector< double >& outputMultipoleMonents );
// vdw
extern void SetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext gpu);
extern void kCalculateAmoebaVdw14_7Forces(amoebaGpuContext amoebaGpu, int applyCutoff );
// wca dispersion
extern void SetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext gpu);
extern void kCalculateAmoebaWcaDispersionForces(amoebaGpuContext amoebaGpu );
// fixed electric field -- no cutoff
extern void SetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaFixedEField( amoebaGpuContext gpu);
// fixed electric field -- PME
extern void SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext gpu);
// fixed electric field and Gk
extern void SetCalculateAmoebaCudaFixedEAndGKFieldsSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaFixedEAndGKFieldsSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext gpu);
// mutual induced
extern void SetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaMutualInducedFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaMutualInducedField( amoebaGpuContext gpu);
extern void SetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaPmeMutualInducedField( amoebaGpuContext gpu);
// mutual induced and Gk
extern void SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGpu);
extern void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGpu);
extern void cudaComputeAmoebaMutualInducedAndGkField( amoebaGpuContext gpu);
extern void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu );
extern void cudaWriteFloat4AndFloat1ArraysToFile( int numberOfAtoms, const std::string& fname, int timestep, int entriesPerAtom1, CUDAStream<float4>* array1,
int entriesPerAtom2, CUDAStream<float>* array2 );
extern void SetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueToForce );
extern void cudaComputeAmoebaElectrostaticPotential( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpuContext amoebaGpu );
extern void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaCudaMapTorquesSim(amoebaGpuContext gpu);
extern void GetCalculateAmoebaCudaMapTorquesSim(amoebaGpuContext gpu);
extern void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext gpu, CUDAStream<float>* psTorque );
extern void SetCalculateAmoebaKirkwoodSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaKirkwoodSim( amoebaGpuContext amoebaGpu );
//extern void cudaComputeAmoebaKirkwood( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaKirkwoodEDiffSim( amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaKirkwoodEDiffSim( amoebaGpuContext amoebaGpu );
//extern void cudaComputeAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu );
//extern void SetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
//extern void GetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
//extern void cudaComputeAmoebaBornRadii( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaGrycukBornRadii( amoebaGpuContext amoebaGpu );
extern void kReduceGrycukGbsaBornSum( amoebaGpuContext gpu );
extern void SetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu );
extern void kCalculateGrycukGbsaForces2( amoebaGpuContext amoebaGpu );
// OBC -- Part 1
//extern void SetCalculateObcGbsaForces1Sim(gpuContext gpu);
//extern void GetCalculateObcGbsaForces1Sim(gpuContext gpu);
//extern void kCalculateObcGbsaForces1(gpuContext gpu);
extern void SetCalculateAmoebaObcGbsaForces2Sim(amoebaGpuContext amoebaGpu);
extern void GetCalculateAmoebaObcGbsaForces2Sim(amoebaGpuContext amoebaGpu);
extern void kCalculateAmoebaObcGbsaForces2( amoebaGpuContext amoebaGpu );
extern void cudaReduceN2ToN( float *N2Array, int N, float *NArray, int includeDiagonal, int offset );
extern float cudaGetSum( int numberOfElements, CUDAStream<float>* array );
extern float cudaGetNorm2( int numberOfElements, CUDAStream<float>* array );
extern int checkForNansAndInfinities( int numberOfElements, CUDAStream<float>* array );
extern void cudaWriteFloat1AndFloat1ArraysToFile( int numberOfAtoms, const std::string& fname, std::vector<int>& fileId, int entriesPerAtom1, CUDAStream<float>* array1,
int entriesPerAtom2, CUDAStream<float>* array2 );
extern void readFile( std::string fileName, StringVectorVector& fileContents );
extern void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float2>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order, float conversion );
extern void cudaWriteVectorOfDoubleVectorsToFile( const std::string& fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
extern void initializeCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, float initValue );
extern void checkForNans( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, int* order, int iteration, std::string idString, FILE* log );
extern void checkForNansFloat4( int numberOfParticles, CUDAStream<float4>* array, int* order, int iteration, std::string idString, FILE* log );
extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );
extern void kClearFloat4( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float4>* fieldToClear );
extern void kClearFields_1( amoebaGpuContext amoebaGpu );
extern void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear );
extern void kClearBornSum(gpuContext gpu);
extern unsigned int getThreadsPerBlock( amoebaGpuContext amoebaGpu, unsigned int sharedMemoryPerThread, unsigned int sharedMemoryPerBlock );
//extern int isNanOrInfinity( double number );
extern void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration);
extern void zeroCUDAStreamFloat4( CUDAStream<float4>* streamToCopy );
extern void reduceAndCopyCUDAStreamFloat4( CUDAStream<float4>* streamToCopy, CUDAStream<float>* outputStream, float conversion );
extern void reduceAndCopyCUDAStreamFloat( CUDAStream<float>* streamToCopy, CUDAStream<float>* outputStream, float conversion );
// PME
extern void SetCalculateAmoebaPMESim( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaPMEFixedMultipoles(amoebaGpuContext amoebaGpu);
extern void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu);
extern void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu);
extern void SetCalculateAmoebaCudaUtilitiesSim( amoebaGpuContext amoebaGpu );
double getTimeOfDay( void );
#endif //__AMOEBA_GPU_TYPES_H__
#ifndef AMOEBA_CUDATYPES_H
#define AMOEBA_CUDATYPES_H
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <kernels/cudatypes.h>
#include <stdarg.h>
#include <limits>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cufft.h>
#include <builtin_types.h>
#include <vector_functions.h>
enum CudaAmoebaNonbondedMethod
{
AMOEBA_NO_CUTOFF,
AMOEBA_PARTICLE_MESH_EWALD
};
static const int AMOEBA_PME_ORDER = 5;
static const int AMOEBA_MAX_TORSION_TORSION_GRIDS = 12;
struct cudaAmoebaGmxSimulation {
// Constants
unsigned int amoebaBonds; // Number of bonds
int4* pAmoebaBondID; // Bond atom and output buffer IDs
float2* pAmoebaBondParameter; // Bond parameters
float amoebaBondCubicParameter; // cubic bond parameters
float amoebaBondQuarticicParameter; // quartic bond parameters
unsigned int amoebaBond_offset; // Offset to end of bonds
unsigned int amoebaAngles; // Number of bond angles
int4* pAmoebaAngleID1; // Bond angle atom and first output buffer IDs
int2* pAmoebaAngleID2; // Bond angle output buffer IDs
float2* pAmoebaAngleParameter; // Bond angle parameters
unsigned int amoebaAngle_offset; // Offset to end of bond angles
float amoebaAngleCubicK; // cubic factor
float amoebaAngleQuarticK; // quartic factor
float amoebaAnglePenticK; // pentic factor
float amoebaAngleSexticK; // sextic factor
unsigned int amoebaInPlaneAngles; // Number of in-plane angles
int4* pAmoebaInPlaneAngleID1; // Bond angle atom and first output buffer IDs
int4* pAmoebaInPlaneAngleID2; // Bond angle output buffer IDs
float2* pAmoebaInPlaneAngleParameter; // Bond angle parameters
unsigned int amoebaInPlaneAngle_offset; // Offset to end of bond angles
float amoebaInPlaneAngleCubicK; // cubic factor
float amoebaInPlaneAngleQuarticK; // quartic factor
float amoebaInPlaneAnglePenticK; // pentic factor
float amoebaInPlaneAngleSexticK; // sextic factor
unsigned int amoebaTorsions; // Number of torsions
int4* pAmoebaTorsionID1; // Torsion atom and first output buffer IDs
int4* pAmoebaTorsionID2; // Torsion output buffer IDs
float4* pAmoebaTorsionParameter1; // Torsion parameters
float2* pAmoebaTorsionParameter2; // Torsion parameters
unsigned int amoebaTorsion_offset; // Offset to end of torsions
unsigned int amoebaPiTorsions; // Number of torsions
int4* pAmoebaPiTorsionID1; // PiTorsion atom and first output buffer IDs
int4* pAmoebaPiTorsionID2; // PiTorsion output buffer IDs
int4* pAmoebaPiTorsionID3; // PiTorsion output buffer IDs
float* pAmoebaPiTorsionParameter; // PiTorsion parameters
unsigned int amoebaPiTorsion_offset; // Offset to end of torsions
unsigned int amoebaStretchBends; // Number of stretch bends
int4* pAmoebaStretchBendID1; // stretch bend atoms and first output buffer IDs
int2* pAmoebaStretchBendID2; // stretch bend output buffer IDs
float4* pAmoebaStretchBendParameter; // stretch bend parameters
unsigned int amoebaStretchBend_offset; // Offset to end of stretch bends
unsigned int amoebaOutOfPlaneBends; // Number of stretch bends
int4* pAmoebaOutOfPlaneBendID1; // stretch bend atoms and first output buffer IDs
int4* pAmoebaOutOfPlaneBendID2; // stretch bend output buffer IDs
float* pAmoebaOutOfPlaneBendParameter; // stretch bend parameters
unsigned int amoebaOutOfPlaneBend_offset; // Offset to end of stretch bends
float amoebaOutOfPlaneBendCubicK; // cubic factor
float amoebaOutOfPlaneBendQuarticK; // quartic factor
float amoebaOutOfPlaneBendPenticK; // pentic factor
float amoebaOutOfPlaneBendSexticK; // sextic factor
unsigned int amoebaTorsionTorsions; // Number of torsion torsions
int4* pAmoebaTorsionTorsionID1; // torsion torsion atoms and first output buffer IDs
int4* pAmoebaTorsionTorsionID2; // torsion torsion output buffer IDs
int4* pAmoebaTorsionTorsionID3; // torsion torsion parameters
unsigned int amoebaTorsionTorsion_offset; // Offset to end of torsion torsions
// grids
int amoebaTorTorGridOffset[AMOEBA_MAX_TORSION_TORSION_GRIDS]; // grid offset
int amoebaTorTorGridNy[AMOEBA_MAX_TORSION_TORSION_GRIDS]; // 25
float amoebaTorTorGridBegin[AMOEBA_MAX_TORSION_TORSION_GRIDS]; // -180.0
float amoebaTorTorGridDelta[AMOEBA_MAX_TORSION_TORSION_GRIDS]; // 15.0
float4* pAmoebaTorsionTorsionGrids; // torsion torsion grids
unsigned int amoebaUreyBradleys; // Number of UB ixns
int4* pAmoebaUreyBradleyID; // UreyBradley atom and output buffer IDs
float2* pAmoebaUreyBradleyParameter; // UreyBradley parameters
float amoebaUreyBradleyCubicParameter;// cubic parameter
float amoebaUreyBradleyQuarticicParameter; // quartic parameter
unsigned int amoebaUreyBradley_offset; // Offset to end of bonds
float sqrtPi; // sqrt(PI)
float scalingDistanceCutoff; // scaling cutoff
float2* pDampingFactorAndThole; // Thole & damping factors
int polarizationType; // polarization type (0=Mutual, 1=Direct)
int4* pMultipoleParticlesIdsAndAxisType;
int4* pMultipoleParticlesTorqueBufferIndices;
int maxTorqueBufferIndex;
float4* pTorqueMapForce4;
float* pMolecularDipole;
float* pMolecularQuadrupole;
unsigned int paddedPotentialGridSize;
unsigned int potentialGridSize;
unsigned int* pPotentialWorkUnit;
unsigned int potentialWorkUnits;
float4* pPotentialGrid;
float* pPotential;
float* pLabFrameDipole;
float* pLabFrameQuadrupole;
float* pInducedDipole;
float* pInducedDipolePolar;
float* pInducedDipoleS;
float* pInducedDipolePolarS;
float* pTorque;
float* pWorkArray_3_1;
float* pWorkArray_3_2;
float* pWorkArray_1_1;
float* pWorkArray_1_2;
int vdwUsePBC;
float vdwCutoff;
float vdwCutoff2;
float vdwTaperCutoff;
float vdwTaperCutoff2;
float vdwTaperDelta;
#define VDW_TAPER_TABLE_SIZE 100
float vdwTaperTable[VDW_TAPER_TABLE_SIZE+1];
float vdw_dTaperTable[VDW_TAPER_TABLE_SIZE+1];
unsigned int amoebaVdwNonReductions;
int* pAmoebaVdwNonReductionID;
unsigned int* pVdwWorkUnit;
unsigned int amoebaVdwReductions;
int4* pAmoebaVdwReductionID;
float* pAmoebaVdwReduction;
int* pVdwExclusionIndicesIndex;
int* pVdwExclusionIndices;
// WCA constants
float epso;
float epsh;
float rmino;
float rminh;
float awater;
float shctd;
float dispoff;
float totalMaxWcaDispersionEnergy;
float2* pWcaDispersionRadiusEpsilon;
// scaling indices
int* pScaleIndicesIndex;
int* pD_ScaleIndices;
int2* pP_ScaleIndices;
int2* pM_ScaleIndices;
float electric; // 3.320637090E+02f;
float gkc; // 2.455f;
float dielec; // 1.0f;
float dwater; // 78.3f;
float fc; // electric * 1.0f * (1.0f-dwater)/(0.0f+1.0f*dwater);
float fd; // electric * 2.0f * (1.0f-dwater)/(1.0f+2.0f*dwater);
float fq; // electric * 3.0f * (1.0f-dwater)/(2.0f+3.0f*dwater);
// PME arrays
float4* pThetai1;
float4* pThetai2;
float4* pThetai3;
int4* pIgrid;
float* pPhi;
float* pPhid;
float* pPhip;
float* pPhidp;
};
#endif
#ifndef __AMOEBA_GPUTYPES_H__
#define __AMOEBA_GPUTYPES_H__
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "kernels/gputypes.h"
#include "OpenMM.h"
#include "openmm/Vec3.h"
#include "amoebaCudaTypes.h"
#include <map>
typedef std::map<int,float> MapIntFloat;
typedef MapIntFloat::const_iterator MapIntFloatCI;
struct _amoebaGpuContext {
_gpuContext* gpuContext;
cudaAmoebaGmxSimulation amoebaSim;
FILE* log;
CUDAStream<int4>* psAmoebaBondID;
CUDAStream<float2>* psAmoebaBondParameter;
CUDAStream<int4>* psAmoebaUreyBradleyID;
CUDAStream<float2>* psAmoebaUreyBradleyParameter;
CUDAStream<int4>* psAmoebaAngleID1;
CUDAStream<int2>* psAmoebaAngleID2;
CUDAStream<float2>* psAmoebaAngleParameter;
CUDAStream<int4>* psAmoebaInPlaneAngleID1;
CUDAStream<int4>* psAmoebaInPlaneAngleID2;
CUDAStream<float2>* psAmoebaInPlaneAngleParameter;
CUDAStream<int4>* psAmoebaTorsionID1;
CUDAStream<int4>* psAmoebaTorsionID2;
CUDAStream<float4>* psAmoebaTorsionParameter1;
CUDAStream<float2>* psAmoebaTorsionParameter2;
CUDAStream<int4>* psAmoebaPiTorsionID1;
CUDAStream<int4>* psAmoebaPiTorsionID2;
CUDAStream<int4>* psAmoebaPiTorsionID3;
CUDAStream<float>* psAmoebaPiTorsionParameter;
CUDAStream<int4>* psAmoebaStretchBendID1;
CUDAStream<int2>* psAmoebaStretchBendID2;
CUDAStream<float4>* psAmoebaStretchBendParameter;
CUDAStream<int4>* psAmoebaOutOfPlaneBendID1;
CUDAStream<int4>* psAmoebaOutOfPlaneBendID2;
CUDAStream<float>* psAmoebaOutOfPlaneBendParameter;
CUDAStream<int4>* psAmoebaTorsionTorsionID1;
CUDAStream<int4>* psAmoebaTorsionTorsionID2;
CUDAStream<int4>* psAmoebaTorsionTorsionID3;
CUDAStream<float4>* psAmoebaTorsionTorsionGrids;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
int maxCovalentDegreeSz;
float solventDielectric;
// multipole parameters
CUDAStream<int4>* psMultipoleParticlesIdsAndAxisType;
// buffer indices used for mapping torques onto forces
int torqueMapForce4Delete;
CUDAStream<int4>* psMultipoleParticlesTorqueBufferIndices;
CUDAStream<float4>* psTorqueMapForce4;
CUDAStream<float>* psMolecularDipole;
CUDAStream<float>* psMolecularQuadrupole;
CUDAStream<unsigned int>* psPotentialWorkUnit;
CUDAStream<float4>* psPotentialGrid;
CUDAStream<float>* psPotential;
// molecular frame multipoles
CUDAStream<float>* psLabFrameDipole;
CUDAStream<float>* psLabFrameQuadrupole;
// scaling-related parameters
CUDAStream<float2>* psDampingFactorAndThole;
// used to setup scaling constants
std::vector<int> covalentDegree;
std::vector<int> polarizationDegree;
// fixed-E field
CUDAStream<float>* psE_Field;
CUDAStream<float>* psE_FieldPolar;
int multipoleNonbondedMethod;
double cutoffDistance;
// mutual induced field
int mutualInducedIterativeMethod;
int mutualInducedMaxIterations;
int mutualInducedConverged;
int mutualInducedDone;
int epsilonThreadsPerBlock;
float mutualInducedTargetEpsilon;
float mutualInducedCurrentEpsilon;
CUDAStream<float>* psInducedDipole;
CUDAStream<float>* psInducedDipolePolar;
CUDAStream<float>* psPolarizability;
CUDAStream<float>* psCurrentEpsilon;
// SOR arrays for mutual induced field
unsigned int numberOfSorWorkVectors;
CUDAStream<float>* psWorkVector[4];
// electrostatic
CUDAStream<float>* psTorque;
// Kirkwood fields
CUDAStream<float>* psGk_Field;
CUDAStream<float>* psInducedDipoleS;
CUDAStream<float>* psInducedDipolePolarS;
CUDAStream<float>* psBorn;
CUDAStream<float>* psBornPolar;
int includeObcCavityTerm;
// Vdw fields
CUDAStream<float2>* psVdwSigmaEpsilon;
CUDAStream<int>* psAmoebaVdwNonReductionID;
CUDAStream<int4>* psAmoebaVdwReductionID;
CUDAStream<float>* psAmoebaVdwReduction;
CUDAStream<float4>* psAmoebaVdwCoordinates;
CUDAStream<unsigned int>* psVdwWorkUnit;
CUDAStream<int>* psVdwExclusionIndicesIndex;
CUDAStream<int>* psVdwExclusionIndices;
int vdwSigmaCombiningRule;
int vdwEpsilonCombiningRule;
std::vector< std::vector<int> > vdwExclusions;
// Wca dispersion fields
CUDAStream<float2>* psWcaDispersionRadiusEpsilon;
// PME fields
CUDAStream<float4>* psThetai1;
CUDAStream<float4>* psThetai2;
CUDAStream<float4>* psThetai3;
CUDAStream<int4>* psIgrid;
CUDAStream<float>* psPhi;
CUDAStream<float>* psPhid;
CUDAStream<float>* psPhip;
CUDAStream<float>* psPhidp;
};
typedef struct _amoebaGpuContext *amoebaGpuContext;
// Function prototypes
extern "C"
amoebaGpuContext amoebaGpuInit( _gpuContext* gpu );
extern "C"
void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext gpu, FILE* log );
extern "C"
void amoebaGpuShutDown(amoebaGpuContext gpu);
extern "C"
void amoebaGpuBuildOutputBuffers( amoebaGpuContext gpu, int hasKirkwood );
extern "C"
int amoebaGpuBuildThreadBlockWorkList( amoebaGpuContext gpu );
extern "C"
void amoebaGpuBuildScalingList( amoebaGpuContext gpu );
extern "C"
void gpuSetAmoebaBondParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<float>& length, const std::vector<float>& k, float cubic, float quartic);
extern "C"
void gpuSetAmoebaUreyBradleyParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<float>& length, const std::vector<float>& k, float cubic, float quartic);
extern "C"
void gpuSetAmoebaAngleParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
const std::vector<float>& angle, const std::vector<float>& k, float cubicK,
float quarticK, float penticK, float sexticK);
extern "C"
void gpuSetAmoebaInPlaneAngleParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<int>& atom3, const std::vector<int>& atom4,
const std::vector<float>& angle, const std::vector<float>& k, float cubicK,
float quarticK, float penticK, float sexticK);
extern "C"
void gpuSetAmoebaTorsionParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<int>& atom3, const std::vector<int>& atom4,
const std::vector< std::vector<float> >& torsion1,
const std::vector< std::vector<float> >& torsion2,
const std::vector< std::vector<float> >& torsion3 );
extern "C"
void gpuSetAmoebaPiTorsionParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2,
const std::vector<int>& atom3, const std::vector<int>& atom4,
const std::vector<int>& atom5, const std::vector<int>& atom6,
const std::vector<float>& torsion1 );
extern "C"
void gpuSetAmoebaStretchBendParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
const std::vector<float>& lengthAB,
const std::vector<float>& lengthCB,
const std::vector<float>& angle,
const std::vector<float>& k );
extern "C"
void gpuSetAmoebaOutOfPlaneBendParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
const std::vector<int>& atom4, const std::vector<float>& k,
float cubicK, float quarticK, float penticK, float sexticK );
extern "C"
void gpuSetAmoebaTorsionTorsionParameters(amoebaGpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
const std::vector<int>& atom4, const std::vector<int>& atom5, const std::vector<int>& chiralAtomIndex, const std::vector<int>& gridIndex );
extern "C"
void gpuSetAmoebaTorsionTorsionGrids(amoebaGpuContext gpu, const std::vector< std::vector< std::vector< std::vector<float> > > >& floatGrids );
extern "C"
void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vector<float>& charges, const std::vector<float>& dipoles, const std::vector<float>& quadrupoles,
const std::vector<int>& axisType, const std::vector<int>& multipoleAtomZ, const std::vector<int>& multipoleAtomX, const std::vector<int>& multipoleAtomY,
const std::vector<float>& tholes, float scalingDistanceCutoff,const std::vector<float>& dampingFactors, const std::vector<float>& polarity,
const std::vector< std::vector< std::vector<int> > >& multipoleAtomCovalentInfo, const std::vector<int>& covalentDegree,
const std::vector<int>& minCovalentIndices, const std::vector<int>& minCovalentPolarizationIndices, int maxCovalentRange,
int mutualInducedIterationMethod, int mutualInducedMaxIterations, float mutualInducedTargetEpsilon,
int nonbondedMethod, int polarizationType, float cutoffDistance, float alphaEwald );
extern "C"
void gpuSetupElectrostaticPotentialCalculation( amoebaGpuContext amoebaGpu, const std::vector< OpenMM::Vec3 >& inputGrid );
extern "C"
void gpuLoadElectrostaticPotential( amoebaGpuContext amoebaGpu, unsigned int gridSize, std::vector< double >& outputElectrostaticPotential );
extern "C"
void gpuCleanupElectrostaticPotentialCalculation( amoebaGpuContext amoebaGpu );
extern "C"
void gpuSetAmoebaObcParameters( amoebaGpuContext amoebaGpu , float innerDielectric, float solventDielectric,
const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge,
int includeCavityTerm, float probeRadius, float surfaceAreaFactor);
extern "C"
void gpuSetAmoebaGrycukParameters( amoebaGpuContext amoebaGpu , float innerDielectric, float solventDielectric,
const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge,
int includeCavityTerm, float probeRadius, float surfaceAreaFactor);
extern "C"
void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const std::vector<int>& indexIVs,
const std::vector<float>& sigmas,
const std::vector<float>& epsilons,
const std::vector<float>& reductions,
const std::string& sigmaCombiningRule,
const std::string& epsilonCombiningRule,
const std::vector< std::vector<int> >& allExclusions, int usePBC, float cutoff );
extern "C"
void gpuSetAmoebaPMEParameters(amoebaGpuContext amoebaGpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ);
extern "C"
void amoebaGpuBuildVdwExclusionList( amoebaGpuContext amoebaGpu );
extern "C"
void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
const std::vector<float>& radii,
const std::vector<float>& epsilons,
const float totalMaxWcaDisperionEnergy,
const float epso, const float epsh, const float rmino, const float rminh,
const float awater, const float shctd, const float dispoff );
extern "C"
void amoebaGpuSetConstants(amoebaGpuContext gpu, int updateFlag );
extern "C"
void gpuSetAmoebaBondOffsets(amoebaGpuContext gpu);
#endif //__AMOEBA_GPUTYPES_H__
#ifndef __AMOEBA_SCALE_FACTORS_H__
#define __AMOEBA_SCALE_FACTORS_H__
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
static __constant__ float mpoleScale[5] = { 0.0f, 0.0f, 0.0f, 0.4f, 0.8f };
static __constant__ float polarScale[5] = { 0.0f, 0.0f, 0.0f, 1.0f, 1.0f };
static __constant__ float directScale[5] = { 0.0f, 1.0f, 1.0f, 1.0f, 1.0f };
//float mutualScale[5] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
// must be explicitly initialized!
//static __constant__ float mScale[4] = { 0.0f, 0.4f, 0.8f, 1.0f };
//static __constant__ float pScale[4] = { 1.0f, 0.5f, 0.0f, -2.0f };
//static __constant__ float dScale[2] = { 0.0f, 1.0f };
//static __constant__ float uScale[5] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
// subroutine to get masked scale factors
__device__ static void getMaskedDScaleFactor( unsigned int gridIndex, int scaleMask, float* dScale )
{
unsigned int mask = 1 << gridIndex;
*dScale = (scaleMask & mask) ? 0.0f : 1.0f;
}
__device__ static void getMaskedPScaleFactor( unsigned int gridIndex, int2 scaleMask, float* pScale )
{
unsigned int mask = 1 << gridIndex;
*pScale = (scaleMask.x & mask) ? 0.5f : 1.0f;
*pScale *= (scaleMask.y & mask) ? 0.0f : 1.0f;
}
__device__ static void getMaskedMScaleFactor( unsigned int gridIndex, int2 scaleMask, float* mScale )
{
unsigned int mask = 1 << gridIndex;
// 0 0 -> 1 -> 1 -> 1.0
// 1 0 -> 1 -> 0.4 -> 0.4
// 0 1 -> 1 -> 0.8 -> 0.8
// 1 1 -> 0 -> 0 -> 0.0
*mScale = (scaleMask.x & mask) && (scaleMask.y & mask) ? 0.0f : 1.0f;
*mScale *= (scaleMask.x & mask) ? 0.8f : 1.0f;
*mScale *= (scaleMask.y & mask) ? 0.4f : 1.0f;
}
// subroutine to get cell coordinates
__device__ static void decodeCell( unsigned int cellId, unsigned int* x, unsigned int* y, bool* exclusions )
{
*x = cellId;
*y = ((*x >> 2) & 0x7fff) << GRIDBITS;
*exclusions = (*x & 0x1);
*x = (*x >> 17) << GRIDBITS;
}
__device__ static void load3dArrayBufferPerWarp( unsigned int offset, float* forceSum, float* outputForce )
{
float of;
of = outputForce[offset];
of += forceSum[0];
outputForce[offset] = of;
of = outputForce[offset+1];
of += forceSum[1];
outputForce[offset+1] = of;
of = outputForce[offset+2];
of += forceSum[2];
outputForce[offset+2] = of;
}
__device__ static void add3dArrayToFloat4( unsigned int offset, volatile float* forceSum, float4* outputForce )
{
float4 of;
of = outputForce[offset];
of.x += forceSum[0];
of.y += forceSum[1];
of.z += forceSum[2];
outputForce[offset] = of;
}
__device__ static void load3dArrayToFloat4( unsigned int offset, float* forceSum, float4* outputForce )
{
float4 of;
of.x = forceSum[0];
of.y = forceSum[1];
of.z = forceSum[2];
of.w = 0.0f;
outputForce[offset] = of;
}
__device__ static void load3dArray( unsigned int offset, volatile float* forceSum, float* outputForce )
{
outputForce[offset] = forceSum[0];
outputForce[offset+1] = forceSum[1];
outputForce[offset+2] = forceSum[2];
}
__device__ static void add3dArray( unsigned int offset, volatile float* forceSum, float* outputForce )
{
outputForce[offset] += forceSum[0];
outputForce[offset+1] += forceSum[1];
outputForce[offset+2] += forceSum[2];
}
__device__ static void scale3dArray( float scaleFactor, float* force )
{
force[0] *= scaleFactor;
force[1] *= scaleFactor;
force[2] *= scaleFactor;
}
#endif //__AMOEBA_SCALE_FACTORS_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
#include "openmm/OpenMMException.h"
#include <stdio.h>
using namespace std;
void kCalculateAmoebaSystemMultipoleMoments( amoebaGpuContext amoebaGpu, std::vector< double >& outputMultipoleMoments )
{
// setup
kSetupAmoebaMultipoleForces(amoebaGpu, false );
gpuContext gpu = amoebaGpu->gpuContext;
gpu->psPosq4->Download();
gpu->psVelm4->Download();
float4* posq = gpu->psPosq4->_pSysData;
float4* velm = gpu->psVelm4->_pSysData;
float totalMass = 0.0f;
float centerOfMass[3] = { 0.0f, 0.0f, 0.0f };
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
float mass;
if( velm->w > 0.0f ){
mass = 1.0f/velm[ii].w;
} else {
mass = 0.0f;
}
totalMass += mass;
centerOfMass[0] += mass*posq[ii].x;
centerOfMass[1] += mass*posq[ii].y;
centerOfMass[2] += mass*posq[ii].z;
}
std::vector<float4> posqLocal(gpu->natoms);
if( totalMass > 0.0f ){
centerOfMass[0] /= totalMass;
centerOfMass[1] /= totalMass;
centerOfMass[2] /= totalMass;
}
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
posqLocal[ii].x = posq[ii].x - centerOfMass[0];
posqLocal[ii].y = posq[ii].y - centerOfMass[1];
posqLocal[ii].z = posq[ii].z - centerOfMass[2];
posqLocal[ii].w = posq[ii].w;
}
float netchg = 0.0f;
float xdpl = 0.0f;
float ydpl = 0.0f;
float zdpl = 0.0f;
float xxqdp = 0.0f;
float xyqdp = 0.0f;
float xzqdp = 0.0f;
float yxqdp = 0.0f;
float yyqdp = 0.0f;
float yzqdp = 0.0f;
float zxqdp = 0.0f;
float zyqdp = 0.0f;
float zzqdp = 0.0f;
amoebaGpu->psLabFrameDipole->Download();
float* labFrameDipole = amoebaGpu->psLabFrameDipole->_pSysData;
amoebaGpu->psInducedDipole->Download();
float* inducedDipole = amoebaGpu->psInducedDipole->_pSysData;
amoebaGpu->psLabFrameQuadrupole->Download();
float* labFrameQuadrupole = amoebaGpu->psLabFrameQuadrupole->_pSysData;
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
netchg += posqLocal[ii].w;
float netDipoleX = (labFrameDipole[3*ii] + inducedDipole[3*ii]);
float netDipoleY = (labFrameDipole[3*ii+1] + inducedDipole[3*ii+1]);
float netDipoleZ = (labFrameDipole[3*ii+2] + inducedDipole[3*ii+2]);
xdpl += posqLocal[ii].x*posqLocal[ii].w + netDipoleX;
ydpl += posqLocal[ii].y*posqLocal[ii].w + netDipoleY;
zdpl += posqLocal[ii].z*posqLocal[ii].w + netDipoleZ;
xxqdp += posqLocal[ii].x*posqLocal[ii].x*posqLocal[ii].w + 2.0f*posqLocal[ii].x*netDipoleX;
xyqdp += posqLocal[ii].x*posqLocal[ii].y*posqLocal[ii].w + posqLocal[ii].x*netDipoleY + posqLocal[ii].y*netDipoleX;
xzqdp += posqLocal[ii].x*posqLocal[ii].z*posqLocal[ii].w + posqLocal[ii].x*netDipoleZ + posqLocal[ii].z*netDipoleX;
yxqdp += posqLocal[ii].y*posqLocal[ii].x*posqLocal[ii].w + posqLocal[ii].y*netDipoleX + posqLocal[ii].x*netDipoleY;
yyqdp += posqLocal[ii].y*posqLocal[ii].y*posqLocal[ii].w + 2.0f*posqLocal[ii].y*netDipoleY;
yzqdp += posqLocal[ii].y*posqLocal[ii].z*posqLocal[ii].w + posqLocal[ii].y*netDipoleZ + posqLocal[ii].z*netDipoleY;
zxqdp += posqLocal[ii].z*posqLocal[ii].x*posqLocal[ii].w + posqLocal[ii].z*netDipoleX + posqLocal[ii].x*netDipoleZ;
zyqdp += posqLocal[ii].z*posqLocal[ii].y*posqLocal[ii].w + posqLocal[ii].z*netDipoleY + posqLocal[ii].y*netDipoleZ;
zzqdp += posqLocal[ii].z*posqLocal[ii].z*posqLocal[ii].w + 2.0f*posqLocal[ii].z*netDipoleZ;
}
// convert the quadrupole from traced to traceless form
float qave = (xxqdp + yyqdp + zzqdp)/3.0f;
xxqdp = 1.5f*(xxqdp-qave);
xyqdp = 1.5f*xyqdp;
xzqdp = 1.5f*xzqdp;
yxqdp = 1.5f*yxqdp;
yyqdp = 1.5f*(yyqdp-qave);
yzqdp = 1.5f*yzqdp;
zxqdp = 1.5f*zxqdp;
zyqdp = 1.5f*zyqdp;
zzqdp = 1.5f*(zzqdp-qave);
// add the traceless atomic quadrupoles to total quadrupole
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
xxqdp = xxqdp + 3.0f*labFrameQuadrupole[9*ii];
xyqdp = xyqdp + 3.0f*labFrameQuadrupole[9*ii+1];
xzqdp = xzqdp + 3.0f*labFrameQuadrupole[9*ii+2];
yxqdp = yxqdp + 3.0f*labFrameQuadrupole[9*ii+3];
yyqdp = yyqdp + 3.0f*labFrameQuadrupole[9*ii+4];
yzqdp = yzqdp + 3.0f*labFrameQuadrupole[9*ii+5];
zxqdp = zxqdp + 3.0f*labFrameQuadrupole[9*ii+6];
zyqdp = zyqdp + 3.0f*labFrameQuadrupole[9*ii+7];
zzqdp = zzqdp + 3.0f*labFrameQuadrupole[9*ii+8];
}
float debye = 4.80321f;
outputMultipoleMoments.resize( 13 );
outputMultipoleMoments[0] = netchg;
outputMultipoleMoments[1] = xdpl*debye;
outputMultipoleMoments[2] = ydpl*debye;
outputMultipoleMoments[3] = zdpl*debye;
outputMultipoleMoments[4] = xxqdp*debye;
outputMultipoleMoments[5] = xyqdp*debye;
outputMultipoleMoments[6] = xzqdp*debye;
outputMultipoleMoments[7] = yxqdp*debye;
outputMultipoleMoments[8] = yyqdp*debye;
outputMultipoleMoments[9] = yzqdp*debye;
outputMultipoleMoments[10] = zxqdp*debye;
outputMultipoleMoments[11] = zyqdp*debye;
outputMultipoleMoments[12] = zzqdp*debye;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include <stdio.h>
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
static int const PScaleIndex = 0;
static int const DScaleIndex = 1;
static int const UScaleIndex = 2;
static int const MScaleIndex = 3;
static int const LastScalingIndex = 4;
struct ElectrostaticParticle {
// coordinates charge
float x;
float y;
float z;
float q;
// lab frame dipole
float labFrameDipole[3];
// lab frame quadrupole
float labFrameQuadrupole[9];
// induced dipole
float inducedDipole[3];
// polar induced dipole
float inducedDipoleP[3];
// scaling factors
float thole;
float damp;
float force[3];
//float torque[3];
//float padding;
};
#ifdef Original
#define i35 0.257142857f
#define DOT3_4(u,v) ((u[0])*(v[0]) + (u[1])*(v[1]) + (u[2])*(v[2]))
#define MATRIXDOT31(u,v) u[0]*v[0] + u[1]*v[1] + u[2]*v[2] + \
u[3]*v[3] + u[4]*v[4] + u[5]*v[5] + \
u[6]*v[6] + u[7]*v[7] + u[8]*v[8]
#define DOT31(u,v) ((u[0])*(v[0]) + (u[1])*(v[1]) + (u[2])*(v[2]))
#define one 1.0f
__device__ void calculateElectrostaticPairIxnOrig_kernel( ElectrostaticParticle& atomI, ElectrostaticParticle& atomJ,
float* scalingFactors, float4* outputForce, float4 outputTorque[2]){
float deltaR[3];
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
float* ddsc3 = scalingFactors + Ddsc30Index;
float* ddsc5 = scalingFactors + Ddsc50Index;
float* ddsc7 = scalingFactors + Ddsc70Index;
deltaR[0] = atomJ.x - atomI.x;
deltaR[1] = atomJ.y - atomI.y;
deltaR[2] = atomJ.z - atomI.z;
float r2 = DOT31( deltaR, deltaR );
float r = sqrtf( r2 );
float rr1 = 1.0f/r;
float rr2 = rr1*rr1;
float rr3 = rr1*rr2;
float rr5 = 3.0f*rr3*rr2;
float rr7 = 5.0f*rr5*rr2;
float rr9 = 7.0f*rr7*rr2;
float rr11 = 9.0f*rr9*rr2;
//-------------------------------------------
if( atomI.damp != 0.0f && atomJ.damp != 0.0 && r < cAmoebaSim.scalingDistanceCutoff ){
float distanceIJ, r2I;
distanceIJ = r;
r2I = rr2;
float ratio = distanceIJ/(atomI.damp*atomJ.damp);
float pGamma = atomJ.thole > atomI.thole ? atomI.thole : atomJ.thole;
float damp = ratio*ratio*ratio*pGamma;
float dampExp = expf( -damp );
float damp1 = damp + one;
float damp2 = damp*damp;
float damp3 = damp2*damp;
scalingFactors[Scale3Index] = one - dampExp;
scalingFactors[Scale5Index] = one - damp1*dampExp;
scalingFactors[Scale7Index] = one - ( damp1 + 0.6f*damp2)*dampExp;
scalingFactors[Scale9Index] = one - ( damp1 + ( 2.0f*damp2 + damp3 )*i35)*dampExp;
float factor = 3.0f*damp*dampExp*r2I;
float factor7 = -0.2f + 0.6f*damp;
for( int ii = 0; ii < 3; ii++ ){
scalingFactors[Ddsc30Index + ii] = factor*deltaR[ii];
scalingFactors[Ddsc50Index + ii] = scalingFactors[Ddsc30Index + ii]*damp;
scalingFactors[Ddsc70Index + ii] = scalingFactors[Ddsc50Index + ii]*factor7;
}
}
float scaleI0 = scalingFactors[Scale3Index]*scalingFactors[UScaleIndex];
float dsc0 = scalingFactors[Scale3Index]*scalingFactors[DScaleIndex];
float psc0 = scalingFactors[Scale3Index]*scalingFactors[PScaleIndex];
float scaleI1 = scalingFactors[Scale3Index+1]*scalingFactors[UScaleIndex];
float dsc1 = scalingFactors[Scale3Index+1]*scalingFactors[DScaleIndex];
float psc1 = scalingFactors[Scale3Index+1]*scalingFactors[PScaleIndex];
float dsc2 = scalingFactors[Scale3Index+2]*scalingFactors[DScaleIndex];
float psc2 = scalingFactors[Scale3Index+2]*scalingFactors[PScaleIndex];
float qIr[3], qJr[3];
amatrixProductVector3( atomJ.labFrameQuadrupole, deltaR, qJr);
amatrixProductVector3( atomI.labFrameQuadrupole, deltaR, qIr);
float sc2 = DOT3_4( atomI.labFrameDipole, atomJ.labFrameDipole );
float sc3 = DOT3_4( atomI.labFrameDipole, deltaR );
float sc4 = DOT3_4( atomJ.labFrameDipole, deltaR );
float sc5 = DOT3_4( qIr, deltaR );
float sc6 = DOT3_4( qJr, deltaR );
float sc7 = DOT3_4( qIr, atomJ.labFrameDipole );
float sc8 = DOT3_4( qJr, atomI.labFrameDipole );
float sc9 = DOT3_4( qIr, qJr );
float sc10 = MATRIXDOT31( atomI.labFrameQuadrupole, atomJ.labFrameQuadrupole );
float sci1 = DOT3_4( atomI.inducedDipole, atomJ.labFrameDipole ) +
DOT3_4( atomJ.inducedDipole, atomI.labFrameDipole );
float sci3 = DOT3_4( atomI.inducedDipole, deltaR );
float sci4 = DOT3_4( atomJ.inducedDipole, deltaR );
float sci7 = DOT3_4( qIr, atomJ.inducedDipole );
float sci8 = DOT3_4( qJr, atomI.inducedDipole );
float scip1 = DOT3_4( atomI.inducedDipoleP, atomJ.labFrameDipole ) +
DOT3_4( atomJ.inducedDipoleP, atomI.labFrameDipole );
float scip2 = DOT3_4( atomI.inducedDipole, atomJ.inducedDipoleP) +
DOT3_4( atomJ.inducedDipole, atomI.inducedDipoleP);
float scip3 = DOT3_4( atomI.inducedDipoleP, deltaR );
float scip4 = DOT3_4( atomJ.inducedDipoleP, deltaR );
float scip7 = DOT3_4( qIr, atomJ.inducedDipoleP );
float scip8 = DOT3_4( qJr, atomI.inducedDipoleP );
float scaleF = 0.5f*scalingFactors[UScaleIndex];
float inducedFactor3 = scip2*rr3*scaleF;
float inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
float findmp_0 = inducedFactor3*ddsc3[0] - inducedFactor5*ddsc5[0];
float findmp_1 = inducedFactor3*ddsc3[1] - inducedFactor5*ddsc5[1];
float findmp_2 = inducedFactor3*ddsc3[2] - inducedFactor5*ddsc5[2];
float gli1 = atomJ.q*sci3 - atomI.q*sci4;
float gli2 = -sc3*sci4 - sci3*sc4;
float gli3 = sci3*sc6 - sci4*sc5;
float gli6 = sci1;
float gli7 = 2.0f*(sci7-sci8);
float glip1 = atomJ.q*scip3 - atomI.q*scip4;
float glip2 = -sc3*scip4 - scip3*sc4;
float glip3 = scip3*sc6 - scip4*sc5;
float glip6 = scip1;
float glip7 = 2.0f*(scip7-scip8);
float factor3 = rr3*(( gli1 + gli6)*scalingFactors[PScaleIndex] + (glip1 + glip6)*scalingFactors[DScaleIndex]);
float factor5 = rr5*(( gli2 + gli7)*scalingFactors[PScaleIndex] + (glip2 + glip7)*scalingFactors[DScaleIndex]);
float factor7 = rr7*( gli3*scalingFactors[PScaleIndex] + glip3*scalingFactors[DScaleIndex]);
float fridmp_0 = 0.5f*(factor3*ddsc3[0] + factor5*ddsc5[0] + factor7*ddsc7[0]);
float fridmp_1 = 0.5f*(factor3*ddsc3[1] + factor5*ddsc5[1] + factor7*ddsc7[1]);
float fridmp_2 = 0.5f*(factor3*ddsc3[2] + factor5*ddsc5[2] + factor7*ddsc7[2]);
float gl0 = atomI.q*atomJ.q;
float gl1 = atomJ.q*sc3 - atomI.q*sc4;
float gl2 = atomI.q*sc6 + atomJ.q*sc5 - sc3*sc4;
float gl3 = sc3*sc6 - sc4*sc5;
float gl4 = sc5*sc6;
float gl6 = sc2;
float gl7 = 2.0f*(sc7-sc8);
float gl8 = 2.0f*sc10;
float gl5 = -4.0f*sc9;
float gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*(gl2+gl7+gl8) + rr9*(gl3+gl5) + rr11*gl4;
float gf2 = -atomJ.q*rr3 + sc4*rr5 - sc6*rr7;
float gf3 = atomI.q*rr3 + sc3*rr5 + sc5*rr7;
float gf4 = 2.0f*rr5;
float gf5 = 2.0f*(-atomJ.q*rr5+sc4*rr7-sc6*rr9);
float gf6 = 2.0f*(-atomI.q*rr5-sc3*rr7-sc5*rr9);
float gf7 = 4.0f*rr7;
// energy
float em = scalingFactors[MScaleIndex]*(rr1*gl0 + rr3*(gl1+gl6) + rr5*(gl2+gl7+gl8) + rr7*(gl3+gl5) + rr9*gl4);
float ei = 0.5f*(rr3*(gli1+gli6)*psc0 + rr5*(gli2+gli7)*psc1 + rr7*gli3*psc2);
outputForce->w = em+ei;
float temp1[3],temp2[3],temp3[3];
float qIqJr[3], qJqIr[3], qIdJ[3], qJdI[3];
amatrixProductVector3( atomI.labFrameQuadrupole, atomJ.labFrameDipole, qIdJ );//MK
amatrixProductVector3( atomJ.labFrameQuadrupole, atomI.labFrameDipole, qJdI );//MK
amatrixProductVector3( atomI.labFrameQuadrupole, qJr, qIqJr );//MK
amatrixProductVector3( atomJ.labFrameQuadrupole, qIr, qJqIr );//MK
amatrixProductVector3( atomJ.labFrameQuadrupole, qIr, temp1 );
amatrixProductVector3( atomJ.labFrameQuadrupole, atomI.labFrameDipole, temp2 );
float ftm2_0 = gf1*deltaR[0] +
gf2*atomI.labFrameDipole[0] + gf3*atomJ.labFrameDipole[0] +
gf4*(temp2[0] - qIdJ[0]) +
gf5*qIr[0] + gf6*qJr[0] +
gf7*(qIqJr[0] + temp1[0]);
float ftm2_1 = gf1*deltaR[1] +
gf2*atomI.labFrameDipole[1] + gf3*atomJ.labFrameDipole[1] +
gf4*(temp2[1] - qIdJ[1]) +
gf5*qIr[1] + gf6*qJr[1] +
gf7*(qIqJr[1] + temp1[1]);
float ftm2_2 = gf1*deltaR[2] +
gf2*atomI.labFrameDipole[2] + gf3*atomJ.labFrameDipole[2] +
gf4*(temp2[2] - qIdJ[2]) +
gf5*qIr[2] + gf6*qJr[2] +
gf7*(qIqJr[2] + temp1[2]);
// get the induced force;
// intermediate variables for the induced-permanent terms;
float gfi1 = rr5*0.5f*((gli1+gli6)*psc0 + (glip1+glip6)*dsc0 + scip2*scaleI0) + rr7*((gli7+gli2)*psc1 + (glip7+glip2)*dsc1 -
(sci3*scip4+scip3*sci4)*scaleI1)*0.5f + rr9*(gli3*psc2+glip3*dsc2)*0.5f;
float gfi4 = 2.0f*rr5;
float gfi5 = rr7* (sci4*psc2 + scip4*dsc2);
float gfi6 = -rr7*(sci3*psc2 + scip3*dsc2);
float temp4[3];
float temp5[3];
float temp6[3];
float temp7[3];
float temp8[3];
float temp9[3];
float temp10[3];
float temp11[3];
float temp12[3];
float temp13[3];
float temp14[3];
float temp15[3];
float qIuJp[3], qJuIp[3];
float qIuJ[3], qJuI[3];
amatrixProductVector3(atomJ.labFrameQuadrupole, atomI.inducedDipoleP, temp4);
amatrixProductVector3(atomI.labFrameQuadrupole, atomJ.inducedDipoleP, qIuJp);//MK
amatrixProductVector3(atomJ.labFrameQuadrupole, atomI.inducedDipoleP, qJuIp);//MK
amatrixProductVector3(atomJ.labFrameQuadrupole, atomI.inducedDipole , qJuI);//MK
amatrixProductVector3(atomJ.labFrameQuadrupole, atomI.inducedDipole, temp5);
amatrixProductVector3(atomI.labFrameQuadrupole, atomJ.inducedDipole , qIuJ);//MK
float ftm2i_0 = gfi1*deltaR[0] +
0.5f*(-rr3*atomJ.q*(atomI.inducedDipole[0]*psc0 + atomI.inducedDipoleP[0]*dsc0) +
rr5*sc4*(atomI.inducedDipole[0]*psc1 + atomI.inducedDipoleP[0]*dsc1) -
rr7*sc6*(atomI.inducedDipole[0]*psc2 + atomI.inducedDipoleP[0]*dsc2)) +
(rr3*atomI.q*(atomJ.inducedDipole[0]*psc0+atomJ.inducedDipoleP[0]*dsc0) +
rr5*sc3*(atomJ.inducedDipole[0]*psc1 +atomJ.inducedDipoleP[0]*dsc1) +
rr7*sc5*(atomJ.inducedDipole[0]*psc2 +atomJ.inducedDipoleP[0]*dsc2))*0.5f +
rr5*scaleI1*(sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0] +
sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0])*0.5f +
0.5f*(sci4*psc1+scip4*dsc1)*rr5*atomI.labFrameDipole[0] +
0.5f*(sci3*psc1+scip3*dsc1)*rr5*atomJ.labFrameDipole[0] +
0.5f*gfi4*((temp5[0]-qIuJ[0])*psc1 +
(temp4[0]-qIuJp[0])*dsc1) + gfi5*qIr[0] + gfi6*qJr[0];
float ftm2i_1 = gfi1*deltaR[1] +
0.5f*(-rr3*atomJ.q*(atomI.inducedDipole[1]*psc0 + atomI.inducedDipoleP[1]*dsc0) +
rr5*sc4*(atomI.inducedDipole[1]*psc1 + atomI.inducedDipoleP[1]*dsc1) -
rr7*sc6*(atomI.inducedDipole[1]*psc2 + atomI.inducedDipoleP[1]*dsc2)) +
(rr3*atomI.q*(atomJ.inducedDipole[1]*psc0+atomJ.inducedDipoleP[1]*dsc0) +
rr5*sc3*(atomJ.inducedDipole[1]*psc1 +atomJ.inducedDipoleP[1]*dsc1) +
rr7*sc5*(atomJ.inducedDipole[1]*psc2 +atomJ.inducedDipoleP[1]*dsc2))*0.5f +
rr5*scaleI1*(sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1] +
sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1])*0.5f +
0.5f*(sci4*psc1+scip4*dsc1)*rr5*atomI.labFrameDipole[1] +
0.5f*(sci3*psc1+scip3*dsc1)*rr5*atomJ.labFrameDipole[1] +
0.5f*gfi4*((temp5[1]-qIuJ[1])*psc1 +
(temp4[1]-qIuJp[1])*dsc1) + gfi5*qIr[1] + gfi6*qJr[1];
float ftm2i_2 = gfi1*deltaR[2] +
0.5f*(-rr3*atomJ.q*(atomI.inducedDipole[2]*psc0 + atomI.inducedDipoleP[2]*dsc0) +
rr5*sc4*(atomI.inducedDipole[2]*psc1 + atomI.inducedDipoleP[2]*dsc1) -
rr7*sc6*(atomI.inducedDipole[2]*psc2 + atomI.inducedDipoleP[2]*dsc2)) +
(rr3*atomI.q*(atomJ.inducedDipole[2]*psc0+atomJ.inducedDipoleP[2]*dsc0) +
rr5*sc3*(atomJ.inducedDipole[2]*psc1 +atomJ.inducedDipoleP[2]*dsc1) +
rr7*sc5*(atomJ.inducedDipole[2]*psc2 +atomJ.inducedDipoleP[2]*dsc2))*0.5f +
rr5*scaleI1*(sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2] +
sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2])*0.5f +
0.5f*(sci4*psc1+scip4*dsc1)*rr5*atomI.labFrameDipole[2] +
0.5f*(sci3*psc1+scip3*dsc1)*rr5*atomJ.labFrameDipole[2] +
0.5f*gfi4*((temp5[2]-qIuJ[2])*psc1 +
(temp4[2]-qIuJp[2])*dsc1) + gfi5*qIr[2] + gfi6*qJr[2];
// handle of scaling for partially excluded interactions;
// correction to convert mutual to direct polarization force;
ftm2i_0 -= (fridmp_0 + findmp_0);
ftm2i_1 -= (fridmp_1 + findmp_1);
ftm2i_2 -= (fridmp_2 + findmp_2);
if( cAmoebaSim.polarizationType )
{
float gfd = 0.5*(rr5*scip2*scaleI0 - rr7*(scip3*sci4+sci3*scip4)*scaleI1);
float temp5 = 0.5*rr5*scaleI1;
float fdir_0 = gfd*deltaR[0] + temp5*(sci4*atomI.inducedDipoleP[0] + scip4*atomI.inducedDipole[0] + sci3*atomJ.inducedDipoleP[0] + scip3*atomJ.inducedDipole[0]);
float fdir_1 = gfd*deltaR[1] + temp5*(sci4*atomI.inducedDipoleP[1] + scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1] + scip3*atomJ.inducedDipole[1]);
float fdir_2 = gfd*deltaR[2] + temp5*(sci4*atomI.inducedDipoleP[2] + scip4*atomI.inducedDipole[2] + sci3*atomJ.inducedDipoleP[2] + scip3*atomJ.inducedDipole[2]);
ftm2i_0 -= fdir_0 - findmp_0;
ftm2i_1 -= fdir_1 - findmp_1;
ftm2i_2 -= fdir_2 - findmp_2;
}
// now perform the torque calculation;
// intermediate terms for torque between multipoles i and j;
float gti2 = 0.5f*(sci4*psc1+scip4*dsc1)*rr5;
float gti3 = 0.5f*(sci3*psc1+scip3*dsc1)*rr5;
float gti4 = gfi4;
float gti5 = gfi5;
float gti6 = gfi6;
// get the permanent (ttm2, ttm3) and induced interaction torques (ttm2i, ttm3i)
acrossProductVector3(atomI.labFrameDipole, atomJ.labFrameDipole, temp1);
acrossProductVector3(atomI.labFrameDipole, atomJ.inducedDipole , temp2);
acrossProductVector3(atomI.labFrameDipole, atomJ.inducedDipoleP, temp3);
acrossProductVector3(atomI.labFrameDipole, deltaR, temp4);
acrossProductVector3(deltaR, qIuJp, temp5);
acrossProductVector3(deltaR, qIr, temp6);
acrossProductVector3(deltaR, qIuJ, temp7);
acrossProductVector3(atomJ.inducedDipole , qIr, temp8);
acrossProductVector3(atomJ.inducedDipoleP, qIr, temp9);
acrossProductVector3(atomI.labFrameDipole, qJr, temp10);
acrossProductVector3(atomJ.labFrameDipole, qIr, temp11);
acrossProductVector3(deltaR, qIqJr, temp12);
acrossProductVector3(deltaR, qIdJ, temp13);
amatrixCrossProductMatrix3(atomI.labFrameQuadrupole, atomJ.labFrameQuadrupole, temp14);
acrossProductVector3(qJr, qIr, temp15);
float ttm2_0 = -rr3*temp1[0] + gf2*temp4[0]-gf5*temp6[0] + gf4*(temp10[0] + temp11[0] + temp13[0]-2.0f*temp14[0]) - gf7*(temp12[0] + temp15[0]);
float ttm2i_0 = -rr3*(temp2[0]*psc0+temp3[0]*dsc0)*0.5f + gti2*temp4[0] + gti4*((temp8[0]+ temp7[0])*psc1 + (temp9[0] + temp5[0])*dsc1)*0.5f - gti5*temp6[0];
float ttm2_1 = -rr3*temp1[1] + gf2*temp4[1]-gf5*temp6[1] + gf4*(temp10[1] + temp11[1] + temp13[1]-2.0f*temp14[1]) - gf7*(temp12[1] + temp15[1]);
float ttm2i_1 = -rr3*(temp2[1]*psc0+temp3[1]*dsc0)*0.5f + gti2*temp4[1] + gti4*((temp8[1]+ temp7[1])*psc1 + (temp9[1] + temp5[1])*dsc1)*0.5f - gti5*temp6[1];
float ttm2_2 = -rr3*temp1[2] + gf2*temp4[2]-gf5*temp6[2] + gf4*(temp10[2] + temp11[2] + temp13[2]-2.0f*temp14[2]) - gf7*(temp12[2] + temp15[2]);
float ttm2i_2 = -rr3*(temp2[2]*psc0+temp3[2]*dsc0)*0.5f + gti2*temp4[2] + gti4*((temp8[2]+ temp7[2])*psc1 + (temp9[2] + temp5[2])*dsc1)*0.5f - gti5*temp6[2];
acrossProductVector3(atomJ.labFrameDipole, deltaR, temp2 );
acrossProductVector3(deltaR, qJr, temp3 );
acrossProductVector3(atomI.labFrameDipole, qJr, temp4 );
acrossProductVector3(atomJ.labFrameDipole, qIr, temp5 );
acrossProductVector3(deltaR, qJdI, temp6 );
acrossProductVector3(deltaR, qJqIr, temp7 );
acrossProductVector3(qJr, qIr, temp8 ); // _qJrxqIr
acrossProductVector3(atomJ.labFrameDipole, atomI.inducedDipole , temp9 ); // _dJxuI
acrossProductVector3(atomJ.labFrameDipole, atomI.inducedDipoleP, temp10 ); // _dJxuIp
acrossProductVector3(atomI.inducedDipoleP, qJr, temp11 ); // _uIxqJrp
acrossProductVector3(atomI.inducedDipole , qJr, temp12 ); // _uIxqJr
acrossProductVector3(deltaR, qJuIp, temp13 ); // _rxqJuIp
acrossProductVector3(deltaR, qJuI, temp15 ); // _rxqJuI
float ttm3_0 = rr3*temp1[0] + gf3*temp2[0] - gf6*temp3[0] - gf4*(temp4[0] + temp5[0] + temp6[0] - 2.0f*temp14[0]) - gf7*(temp7[0] - temp8[0]);
float ttm3i_0 = -rr3*(temp9[0]*psc0+ temp10[0]*dsc0)*0.5f + gti3*temp2[0] - gti4*((temp12[0] + temp15[0])*psc1 + (temp11[0] + temp13[0])*dsc1)*0.5f - gti6*temp3[0];
float ttm3_1 = rr3*temp1[1] + gf3*temp2[1] - gf6*temp3[1] - gf4*(temp4[1] + temp5[1] + temp6[1] - 2.0f*temp14[1]) - gf7*(temp7[1] - temp8[1]);
float ttm3i_1 = -rr3*(temp9[1]*psc0+ temp10[1]*dsc0)*0.5f + gti3*temp2[1] - gti4*((temp12[1] + temp15[1])*psc1 + (temp11[1] + temp13[1])*dsc1)*0.5f - gti6*temp3[1];
float ttm3_2 = rr3*temp1[2] + gf3*temp2[2] - gf6*temp3[2] - gf4*(temp4[2] + temp5[2] + temp6[2] - 2.0f*temp14[2]) - gf7*(temp7[2] - temp8[2]);
float ttm3i_2 = -rr3*(temp9[2]*psc0+ temp10[2]*dsc0)*0.5f + gti3*temp2[2] - gti4*((temp12[2] + temp15[2])*psc1 + (temp11[2] + temp13[2])*dsc1)*0.5f - gti6*temp3[2];
if( scalingFactors[MScaleIndex] < 1.0f ){
ftm2_0 *= scalingFactors[MScaleIndex];
ftm2_1 *= scalingFactors[MScaleIndex];
ftm2_2 *= scalingFactors[MScaleIndex];
ttm2_0 *= scalingFactors[MScaleIndex];
ttm2_1 *= scalingFactors[MScaleIndex];
ttm2_2 *= scalingFactors[MScaleIndex];
ttm3_0 *= scalingFactors[MScaleIndex];
ttm3_1 *= scalingFactors[MScaleIndex];
ttm3_2 *= scalingFactors[MScaleIndex];
}
outputForce->x = -(ftm2_0 + ftm2i_0);
outputForce->y = -(ftm2_1 + ftm2i_1);
outputForce->z = -(ftm2_2 + ftm2i_2);
outputTorque[0].x = (ttm2_0 + ttm2i_0);
outputTorque[0].y = (ttm2_1 + ttm2i_1);
outputTorque[0].z = (ttm2_2 + ttm2i_2);
outputTorque[1].x = (ttm3_0 + ttm3i_0);
outputTorque[1].y = (ttm3_1 + ttm3i_1);
outputTorque[1].z = (ttm3_2 + ttm3i_2);
return;
}
#endif
static __device__ void loadElectrostaticParticle( volatile struct ElectrostaticParticle* sA, unsigned int atomI ){
// coordinates & charge
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole[0] = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole[1] = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole[2] = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole[0] = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole[1] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole[2] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole[3] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+3];
sA->labFrameQuadrupole[4] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole[5] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole[6] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+6];
sA->labFrameQuadrupole[7] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+7];
sA->labFrameQuadrupole[8] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
// induced dipole
sA->inducedDipole[0] = cAmoebaSim.pInducedDipole[atomI*3];
sA->inducedDipole[1] = cAmoebaSim.pInducedDipole[atomI*3+1];
sA->inducedDipole[2] = cAmoebaSim.pInducedDipole[atomI*3+2];
// induced dipole polar
sA->inducedDipoleP[0] = cAmoebaSim.pInducedDipolePolar[atomI*3];
sA->inducedDipoleP[1] = cAmoebaSim.pInducedDipolePolar[atomI*3+1];
sA->inducedDipoleP[2] = cAmoebaSim.pInducedDipolePolar[atomI*3+2];
sA->damp = cAmoebaSim.pDampingFactorAndThole[atomI].x;
sA->thole = cAmoebaSim.pDampingFactorAndThole[atomI].y;
}
static __device__ void zeroElectrostaticParticle( volatile struct ElectrostaticParticle* sA ){
sA->force[0] = 0.0f;
sA->force[1] = 0.0f;
sA->force[2] = 0.0f;
}
#undef SUB_METHOD_NAME
#undef F1
#define SUB_METHOD_NAME(a, b) a##F1##b
#define F1
#include "kCalculateAmoebaCudaElectrostatic_b.h"
#undef F1
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
#undef F2
#define SUB_METHOD_NAME(a, b) a##F2##b
#define F2
//#include "kCalculateAmoebaCudaElectrostatic_b.h"
#undef F2
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
#undef T1
#define SUB_METHOD_NAME(a, b) a##T1##b
#define T1
#include "kCalculateAmoebaCudaElectrostatic_b.h"
#undef T1
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
#undef T3
#define SUB_METHOD_NAME(a, b) a##T3##b
#define T3
#include "kCalculateAmoebaCudaElectrostatic_b.h"
#undef T3
#undef SUB_METHOD_NAME
__device__ void calculateElectrostaticPairIxn_kernel( ElectrostaticParticle& atomI, ElectrostaticParticle& atomJ,
float* scalingFactors, float4* outputForce, float4 outputTorque[2], float forceFactor){
#ifdef Orig
return calculateElectrostaticPairIxn_kernel( atomI, atomJ, scalingFactors, outputForce, outputTorque);
#else
float force[3];
float energy;
calculateElectrostaticPairIxnF1_kernel( atomI, atomJ, scalingFactors, &energy, force);
outputForce->x = force[0];
outputForce->y = force[1];
outputForce->z = force[2];
outputForce->w = energy;
calculateElectrostaticPairIxnT1_kernel( atomI, atomJ, scalingFactors, force);
outputTorque[0].x = force[0];
outputTorque[0].y = force[1];
outputTorque[0].z = force[2];
calculateElectrostaticPairIxnT3_kernel( atomI, atomJ, scalingFactors, force);
outputTorque[1].x = force[0];
outputTorque[1].y = force[1];
outputTorque[1].z = force[2];
return;
#endif
}
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaElectrostatic.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaElectrostatic.h"
// reduce psWorkArray_3_1 -> torque
static void kReduceTorque(amoebaGpuContext amoebaGpu ){
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psTorque->_pDevData, 0 );
LAUNCHERROR("kReduceElectrostaticTorque");
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic force & torque
@param amoebaGpu amoebaGpu context
@param addTorqueToForce if set, then add force resulting from torque to force array
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueToForce ){
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// on first pass, set threads/block
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(ElectrostaticParticle), gpu->sharedMemoryPerBlock), maxThreads);
}
kClearFields_3( amoebaGpu, 1 );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
} else {
kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
}
LAUNCHERROR("kCalculateAmoebaCudaElectrostaticN2Forces");
if( addTorqueToForce ){
kReduceTorque( amoebaGpu );
cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque );
}
// ---------------------------------------------------------------------------------------
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(512, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
unsigned int* workUnit, float* outputTorque){
extern __shared__ volatile ElectrostaticParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
float totalEnergy = 0.0f;
float conversionFactor = (cAmoebaSim.electric/cAmoebaSim.dielec);
float scalingFactors[LastScalingIndex];
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
volatile ElectrostaticParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
ElectrostaticParticle localParticle;
loadElectrostaticParticle( &localParticle, atomI );
zeroElectrostaticParticle( &localParticle );
scalingFactors[PScaleIndex] = 1.0f;
scalingFactors[DScaleIndex] = 1.0f;
scalingFactors[UScaleIndex] = 1.0f;
scalingFactors[MScaleIndex] = 1.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadElectrostaticParticle( &(sA[threadIdx.x]), atomI );
unsigned int xi = x >> GRIDBITS;
unsigned int cell = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 mScaleMask = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
unsigned int atomJ = y + j;
if( (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
float force[3];
float energy;
calculateElectrostaticPairIxnF1_kernel( localParticle, psA[j], scalingFactors, &energy, force);
localParticle.force[0] += force[0];
localParticle.force[1] += force[1];
localParticle.force[2] += force[2];
totalEnergy += 0.5f*energy;
}
}
// Write results
localParticle.force[0] *= conversionFactor;
localParticle.force[1] *= conversionFactor;
localParticle.force[2] *= conversionFactor;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
#else
unsigned int offset = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
#endif
add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
zeroElectrostaticParticle( &localParticle );
for (unsigned int j = 0; j < GRID; j++)
{
unsigned int atomJ = y + j;
if( (atomI != atomJ) && (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
getMaskedDScaleFactor( j, dScaleMask, scalingFactors + DScaleIndex );
getMaskedPScaleFactor( j, pScaleMask, scalingFactors + PScaleIndex );
getMaskedMScaleFactor( j, mScaleMask, scalingFactors + MScaleIndex );
float force[3];
calculateElectrostaticPairIxnT1_kernel( localParticle, psA[j], scalingFactors, force);
localParticle.force[0] += force[0];
localParticle.force[1] += force[1];
localParticle.force[2] += force[2];
}
}
localParticle.force[0] *= conversionFactor;
localParticle.force[1] *= conversionFactor;
localParticle.force[2] *= conversionFactor;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArray( 3*offset, localParticle.force, outputTorque );
#else
offset = (x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( 3*offset, localParticle.force, outputTorque );
#endif
} else {
// Read fixed atom data into registers and GRF
if( lasty != y ){
loadElectrostaticParticle( &(sA[threadIdx.x]), (y+tgx) );
}
zeroElectrostaticParticle( &(sA[threadIdx.x]) );
int dScaleMask;
int2 pScaleMask;
int2 mScaleMask;
if( bExclusionFlag ){
unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
mScaleMask = cAmoebaSim.pM_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
}
for (unsigned int j = 0; j < GRID; j++){
unsigned int atomJ = y + tj;
if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
if( bExclusionFlag ){
getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
}
float force[3];
float energy;
calculateElectrostaticPairIxnF1_kernel( localParticle, psA[tj], scalingFactors, &energy, force);
totalEnergy += energy;
localParticle.force[0] += force[0];
localParticle.force[1] += force[1];
localParticle.force[2] += force[2];
psA[tj].force[0] -= force[0];
psA[tj].force[1] -= force[1];
psA[tj].force[2] -= force[2];
}
tj = (tj + 1) & (GRID - 1);
}
// Write results
localParticle.force[0] *= conversionFactor;
localParticle.force[1] *= conversionFactor;
localParticle.force[2] *= conversionFactor;
sA[threadIdx.x].force[0] *= conversionFactor;
sA[threadIdx.x].force[1] *= conversionFactor;
sA[threadIdx.x].force[2] *= conversionFactor;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
offset = (y + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, sA[threadIdx.x].force, cSim.pForce4 );
#else
unsigned int offset = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, localParticle.force, cSim.pForce4 );
offset = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
add3dArrayToFloat4( offset, sA[threadIdx.x].force, cSim.pForce4 );
#endif
zeroElectrostaticParticle( &(sA[threadIdx.x]) );
zeroElectrostaticParticle( &localParticle );
tj = tgx;
for (unsigned int j = 0; j < GRID; j++){
unsigned int atomJ = y + tj;
if( (atomI < cSim.atoms) && (atomJ < cSim.atoms) ){
if( bExclusionFlag ){
getMaskedDScaleFactor( tj, dScaleMask, scalingFactors + DScaleIndex );
getMaskedPScaleFactor( tj, pScaleMask, scalingFactors + PScaleIndex );
getMaskedMScaleFactor( tj, mScaleMask, scalingFactors + MScaleIndex );
}
float force[3];
calculateElectrostaticPairIxnT1_kernel( localParticle, psA[tj], scalingFactors, force);
localParticle.force[0] += force[0];
localParticle.force[1] += force[1];
localParticle.force[2] += force[2];
calculateElectrostaticPairIxnT3_kernel( localParticle, psA[tj], scalingFactors, force);
psA[tj].force[0] += force[0];
psA[tj].force[1] += force[1];
psA[tj].force[2] += force[2];
}
tj = (tj + 1) & (GRID - 1);
}
localParticle.force[0] *= conversionFactor;
localParticle.force[1] *= conversionFactor;
localParticle.force[2] *= conversionFactor;
sA[threadIdx.x].force[0] *= conversionFactor;
sA[threadIdx.x].force[1] *= conversionFactor;
sA[threadIdx.x].force[2] *= conversionFactor;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArray( 3*offset, localParticle.force, outputTorque );
offset = (y + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArray( 3*offset, sA[threadIdx.x].force, outputTorque );
#else
offset = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( 3*offset, localParticle.force, outputTorque );
offset = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( 3*offset, sA[threadIdx.x].force, outputTorque );
#endif
lasty = y;
}
pos++;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy);
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
#include "openmm/OpenMMException.h"
#include <stdio.h>
#include <cuda.h>
#include <cstdlib>
using namespace std;
#define SQRT sqrtf
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
void SetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaMultipolePotentialSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaMultipolePotentialSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaMultipolePotentialSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaMultipolePotentialSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaMultipolePotentialSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
struct ElectrostaticPotentialParticle {
// coordinates charge
float x;
float y;
float z;
float q;
// lab frame dipole
float labFrameDipole[3];
// lab frame quadrupole
float labFrameQuadrupole[9];
// induced dipole
float inducedDipole[3];
};
/**---------------------------------------------------------------------------------------
Load data for particle w/ index=atomI
@param sa address to store atomI's coordinates and multipole moments
@param atomI index of atom whose data is to be stored
--------------------------------------------------------------------------------------- */
static __device__ void loadElectrostaticPotentialParticle( volatile struct ElectrostaticPotentialParticle* sA, unsigned int atomI ){
// coordinates & charge
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole[0] = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole[1] = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole[2] = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole[0] = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole[1] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole[2] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole[3] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+3];
sA->labFrameQuadrupole[4] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole[5] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole[6] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+6];
sA->labFrameQuadrupole[7] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+7];
sA->labFrameQuadrupole[8] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
// induced dipole
sA->inducedDipole[0] = cAmoebaSim.pInducedDipole[atomI*3];
sA->inducedDipole[1] = cAmoebaSim.pInducedDipole[atomI*3+1];
sA->inducedDipole[2] = cAmoebaSim.pInducedDipole[atomI*3+2];
}
/**---------------------------------------------------------------------------------------
Calculate potential at grid point due atomI
Code adapted from TINKER routine potpoint in potpoint.f
@param atomI atomI's coordinates and multipole moments
@param gridPoint grid coordinates
@param potential output potential
--------------------------------------------------------------------------------------- */
__device__ void calculateElectrostaticPotentialForAtomGridPoint_kernel( volatile ElectrostaticPotentialParticle& atomI, volatile float4& gridPoint, float* potential ){
float xr = atomI.x - gridPoint.x;
float yr = atomI.y - gridPoint.y;
float zr = atomI.z - gridPoint.z;
xr -= floorf(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floorf(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floorf(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrtf( r2 );
float rr1 = 1.0f/r;
*potential = atomI.q*rr1;
float rr2 = rr1*rr1;
float rr3 = rr1*rr2;
float scd = atomI.labFrameDipole[0]*xr + atomI.labFrameDipole[1]*yr + atomI.labFrameDipole[2]*zr;
float scu = atomI.inducedDipole[0]*xr + atomI.inducedDipole[1]*yr + atomI.inducedDipole[2]*zr;
*potential -= (scd + scu)*rr3;
float rr5 = 3.0f*rr3*rr2;
float scq = xr*(atomI.labFrameQuadrupole[0]*xr + atomI.labFrameQuadrupole[1]*yr + atomI.labFrameQuadrupole[2]*zr);
scq += yr*(atomI.labFrameQuadrupole[1]*xr + atomI.labFrameQuadrupole[4]*yr + atomI.labFrameQuadrupole[5]*zr);
scq += zr*(atomI.labFrameQuadrupole[2]*xr + atomI.labFrameQuadrupole[5]*yr + atomI.labFrameQuadrupole[8]*zr);
*potential += scq*rr5;
return;
}
// Include versions of the kernels for N x PotentialGridSize calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##NxG##b
#include "kCalculateAmoebaCudaElectrostaticPotential.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##NxGByWarp##b
#include "kCalculateAmoebaCudaElectrostaticPotential.h"
// Kernel to reduce potential
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReducePotential_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float conversionFactor = (cAmoebaSim.electric/cAmoebaSim.dielec);
// Reduce potential
while (pos < cAmoebaSim.paddedPotentialGridSize)
{
float totalPotential = 0.0f;
float* pFt = cAmoebaSim.pPotential + pos;
int i = cSim.outputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f2 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f3 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f4 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
totalPotential += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
float f2 = *pFt;
pFt += cAmoebaSim.paddedPotentialGridSize;
totalPotential += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalPotential += *pFt;
}
totalPotential *= conversionFactor;
pFt = cAmoebaSim.pPotential + pos;
*pFt = totalPotential;
pos += gridDim.x*blockDim.x;
}
}
/**---------------------------------------------------------------------------------------
Reduce Amoeba electrostatic potential
@param gpu gpu context
--------------------------------------------------------------------------------------- */
void kReducePotential(gpuContext gpu)
{
kReducePotential_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
LAUNCHERROR("kReducePotential");
}
/**---------------------------------------------------------------------------------------
Compute Amoeba electrostatic potential
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaElectrostaticPotential( amoebaGpuContext amoebaGpu ){
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// on first pass, set threads/block
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(ElectrostaticPotentialParticle), gpu->sharedMemoryPerBlock), maxThreads);
}
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticPotentialNxGByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticPotentialParticle)*threadsPerBlock>>>( );
} else {
kCalculateAmoebaCudaElectrostaticPotentialNxG_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticPotentialParticle)*threadsPerBlock>>>( );
}
LAUNCHERROR("kCalculateAmoebaCudaElectrostaticPotential");
kReducePotential( amoebaGpu->gpuContext );
// ---------------------------------------------------------------------------------------
}
void kCalculateAmoebaMultipolePotential(amoebaGpuContext amoebaGpu )
{
// setup
kSetupAmoebaMultipoleForces(amoebaGpu, false );
// calculate electrostatic potential
cudaComputeAmoebaElectrostaticPotential( amoebaGpu );
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(512, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
void METHOD_NAME(kCalculateAmoebaCudaElectrostaticPotential, _kernel)( void ){
extern __shared__ volatile ElectrostaticPotentialParticle sAPotential[];
unsigned int* workUnit = cAmoebaSim.pPotentialWorkUnit;
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cAmoebaSim.potentialWorkUnits;
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
while (pos < end){
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
volatile ElectrostaticPotentialParticle* psA = &sAPotential[tbx];
unsigned int gridPointIndex = x + tgx;
unsigned int particleIndex = y + tgx;
// load particle info
loadElectrostaticPotentialParticle( &(sAPotential[threadIdx.x]), particleIndex );
float totalPotential = 0.0f;
for (unsigned int j = 0; j < GRID; j++){
unsigned int particleJ = y + tj;
float potential;
calculateElectrostaticPotentialForAtomGridPoint_kernel( psA[tj], cAmoebaSim.pPotentialGrid[gridPointIndex], &potential );
if( particleJ < cSim.atoms && gridPointIndex < cAmoebaSim.potentialGridSize ){
totalPotential += potential;
}
tj = (tj + 1) & (GRID - 1);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = (x + tgx + warp*cAmoebaSim.paddedPotentialGridSize);
cAmoebaSim.pPotential[offset] += totalPotential;
#else
unsigned int offset = (x + tgx + (y >> GRIDBITS)*cAmoebaSim.paddedPotentialGridSize);
cAmoebaSim.pPotential[offset] = totalPotential;
#endif
pos++;
}
}
__device__ void SUB_METHOD_NAME( calculateElectrostaticPairIxn, _kernel )( ElectrostaticParticle& atomI, volatile ElectrostaticParticle& atomJ,
float* scalingFactors,
#ifdef F1
float* energy,
#endif
float* outputForce ){
#ifdef F1
float ddsc3_0 = 0.0f;
float ddsc3_1 = 0.0f;
float ddsc3_2 = 0.0f;
float ddsc5_0 = 0.0f;
float ddsc5_1 = 0.0f;
float ddsc5_2 = 0.0f;
float ddsc7_0 = 0.0f;
float ddsc7_1 = 0.0f;
float ddsc7_2 = 0.0f;
#endif
float xr = atomJ.x - atomI.x;
float yr = atomJ.y - atomI.y;
float zr = atomJ.z - atomI.z;
float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrtf( r2 );
float rr1 = 1.0f/r;
float rr2 = rr1*rr1;
float rr3 = rr1*rr2;
float rr5 = 3.0f*rr3*rr2;
float rr7 = 5.0f*rr5*rr2;
float rr9 = 7.0f*rr7*rr2;
#ifdef F1
float rr11 = 9.0f*rr9*rr2;
#endif
float scale3 = 1.0f;
float scale5 = 1.0f;
float scale7 = 1.0f;
float pdamp = atomI.damp*atomJ.damp;
if( pdamp != 0.0 && r < cAmoebaSim.scalingDistanceCutoff ){
float ratio = r/pdamp;
float pGamma = atomJ.thole > atomI.thole ? atomI.thole : atomJ.thole;
float damp = ratio*ratio*ratio*pGamma;
float dampExp = expf( -damp );
float damp1 = damp + 1.0f;
float damp2 = damp*damp;
scale3 = 1.0f - dampExp;
scale5 = 1.0f - damp1*dampExp;
scale7 = 1.0f - ( damp1 + 0.6f*damp2)*dampExp;
#ifdef F1
float factor = 3.0f*damp*dampExp*rr2;
float factor7 = -0.2f + 0.6f*damp;
ddsc3_0 = factor*xr;
ddsc5_0 = ddsc3_0*damp;
ddsc7_0 = ddsc5_0*factor7;
ddsc3_1 = factor*yr;
ddsc5_1 = ddsc3_1*damp;
ddsc7_1 = ddsc5_1*factor7;
ddsc3_2 = factor*zr;
ddsc5_2 = ddsc3_2*damp;
ddsc7_2 = ddsc5_2*factor7;
#endif
}
#if defined F1
float scale3i = rr3*scale3*scalingFactors[UScaleIndex];
float scale5i = rr5*scale5*scalingFactors[UScaleIndex];
#endif
float dsc3 = rr3*scale3*scalingFactors[DScaleIndex];
float psc3 = rr3*scale3*scalingFactors[PScaleIndex];
float dsc5 = rr5*scale5*scalingFactors[DScaleIndex];
float psc5 = rr5*scale5*scalingFactors[PScaleIndex];
float dsc7 = rr7*scale7*scalingFactors[DScaleIndex];
float psc7 = rr7*scale7*scalingFactors[PScaleIndex];
float qJr_0 = atomJ.labFrameQuadrupole[0]*xr + atomJ.labFrameQuadrupole[3]*yr + atomJ.labFrameQuadrupole[6]*zr;
float qJr_1 = atomJ.labFrameQuadrupole[1]*xr + atomJ.labFrameQuadrupole[4]*yr + atomJ.labFrameQuadrupole[7]*zr;
float qJr_2 = atomJ.labFrameQuadrupole[2]*xr + atomJ.labFrameQuadrupole[5]*yr + atomJ.labFrameQuadrupole[8]*zr;
float qIr_0 = atomI.labFrameQuadrupole[0]*xr + atomI.labFrameQuadrupole[3]*yr + atomI.labFrameQuadrupole[6]*zr;
float qIr_1 = atomI.labFrameQuadrupole[1]*xr + atomI.labFrameQuadrupole[4]*yr + atomI.labFrameQuadrupole[7]*zr;
float qIr_2 = atomI.labFrameQuadrupole[2]*xr + atomI.labFrameQuadrupole[5]*yr + atomI.labFrameQuadrupole[8]*zr;
#if defined F1
float sc2 = atomI.labFrameDipole[0]*atomJ.labFrameDipole[0] + atomI.labFrameDipole[1]*atomJ.labFrameDipole[1] + atomI.labFrameDipole[2]*atomJ.labFrameDipole[2];
#endif
#if defined F1 || defined T1
float sc4 = atomJ.labFrameDipole[0]*xr + atomJ.labFrameDipole[1]*yr + atomJ.labFrameDipole[2]*zr;
float sc6 = qJr_0*xr + qJr_1*yr + qJr_2*zr;
#endif
#if defined F1 || defined T3
float sc3 = atomI.labFrameDipole[0]*xr + atomI.labFrameDipole[1]*yr + atomI.labFrameDipole[2]*zr;
float sc5 = qIr_0*xr + qIr_1*yr + qIr_2*zr;
#endif
#if defined F1
float sc7 = qIr_0*atomJ.labFrameDipole[0] + qIr_1*atomJ.labFrameDipole[1] + qIr_2*atomJ.labFrameDipole[2];
float sc8 = qJr_0*atomI.labFrameDipole[0] + qJr_1*atomI.labFrameDipole[1] + qJr_2*atomI.labFrameDipole[2];
float sc9 = qIr_0*qJr_0 + qIr_1*qJr_1 + qIr_2*qJr_2;
float sc10 = atomI.labFrameQuadrupole[0]*atomJ.labFrameQuadrupole[0] + atomI.labFrameQuadrupole[1]*atomJ.labFrameQuadrupole[1] + atomI.labFrameQuadrupole[2]*atomJ.labFrameQuadrupole[2] +
atomI.labFrameQuadrupole[3]*atomJ.labFrameQuadrupole[3] + atomI.labFrameQuadrupole[4]*atomJ.labFrameQuadrupole[4] + atomI.labFrameQuadrupole[5]*atomJ.labFrameQuadrupole[5] +
atomI.labFrameQuadrupole[6]*atomJ.labFrameQuadrupole[6] + atomI.labFrameQuadrupole[7]*atomJ.labFrameQuadrupole[7] + atomI.labFrameQuadrupole[8]*atomJ.labFrameQuadrupole[8];
float sci1 = atomI.inducedDipole[0]*atomJ.labFrameDipole[0] + atomI.inducedDipole[1]*atomJ.labFrameDipole[1] + atomI.inducedDipole[2]*atomJ.labFrameDipole[2] +
atomJ.inducedDipole[0]*atomI.labFrameDipole[0] + atomJ.inducedDipole[1]*atomI.labFrameDipole[1] + atomJ.inducedDipole[2]*atomI.labFrameDipole[2];
#endif
#if defined F1 || defined T3
float sci3 = atomI.inducedDipole[0]*xr + atomI.inducedDipole[1]*yr + atomI.inducedDipole[2]*zr;
#endif
#if defined F1
float sci7 = qIr_0*atomJ.inducedDipole[0] + qIr_1*atomJ.inducedDipole[1] + qIr_2*atomJ.inducedDipole[2];
float sci8 = qJr_0*atomI.inducedDipole[0] + qJr_1*atomI.inducedDipole[1] + qJr_2*atomI.inducedDipole[2];
#endif
#if defined F1 || defined T1
float sci4 = atomJ.inducedDipole[0]*xr + atomJ.inducedDipole[1]*yr + atomJ.inducedDipole[2]*zr;
#endif
#if defined F1
float scip1 = atomI.inducedDipoleP[0]*atomJ.labFrameDipole[0] + atomI.inducedDipoleP[1]*atomJ.labFrameDipole[1] + atomI.inducedDipoleP[2]*atomJ.labFrameDipole[2] +
atomJ.inducedDipoleP[0]*atomI.labFrameDipole[0] + atomJ.inducedDipoleP[1]*atomI.labFrameDipole[1] + atomJ.inducedDipoleP[2]*atomI.labFrameDipole[2];
float scip2 = atomI.inducedDipole[0]*atomJ.inducedDipoleP[0] + atomI.inducedDipole[1]*atomJ.inducedDipoleP[1] + atomI.inducedDipole[2]*atomJ.inducedDipoleP[2] +
atomJ.inducedDipole[0]*atomI.inducedDipoleP[0] + atomJ.inducedDipole[1]*atomI.inducedDipoleP[1] + atomJ.inducedDipole[2]*atomI.inducedDipoleP[2];
#endif
#if defined F1 || defined T3
float scip3 = ((atomI.inducedDipoleP[0])*(xr) + (atomI.inducedDipoleP[1])*(yr) + (atomI.inducedDipoleP[2])*(zr));
#endif
#if defined F1 || defined T1
float scip4 = ((atomJ.inducedDipoleP[0])*(xr) + (atomJ.inducedDipoleP[1])*(yr) + (atomJ.inducedDipoleP[2])*(zr));
#endif
#ifdef F1
float scip7 = ((qIr_0)*(atomJ.inducedDipoleP[0]) + (qIr_1)*(atomJ.inducedDipoleP[1]) + (qIr_2)*(atomJ.inducedDipoleP[2]));
float scip8 = ((qJr_0)*(atomI.inducedDipoleP[0]) + (qJr_1)*(atomI.inducedDipoleP[1]) + (qJr_2)*(atomI.inducedDipoleP[2]));
float gli1 = atomJ.q*sci3 - atomI.q*sci4;
float gli6 = sci1;
float glip1 = atomJ.q*scip3 - atomI.q*scip4;
float glip6 = scip1;
float gli2 = -sc3*sci4 - sci3*sc4;
float gli3 = sci3*sc6 - sci4*sc5;
float gli7 = 2.0f*(sci7-sci8);
float glip2 = -sc3*scip4 - scip3*sc4;
float glip3 = scip3*sc6 - scip4*sc5;
float glip7 = 2.0f*(scip7-scip8);
float factor3 = rr3*(( gli1 + gli6)*scalingFactors[PScaleIndex] + (glip1 + glip6)*scalingFactors[DScaleIndex]);
float factor5 = rr5*(( gli2 + gli7)*scalingFactors[PScaleIndex] + (glip2 + glip7)*scalingFactors[DScaleIndex]);
float factor7 = rr7*( gli3*scalingFactors[PScaleIndex] + glip3*scalingFactors[DScaleIndex]);
float ftm2i_0 = -0.5f*(factor3*ddsc3_0 + factor5*ddsc5_0 + factor7*ddsc7_0);
float ftm2i_1 = -0.5f*(factor3*ddsc3_1 + factor5*ddsc5_1 + factor7*ddsc7_1);
float ftm2i_2 = -0.5f*(factor3*ddsc3_2 + factor5*ddsc5_2 + factor7*ddsc7_2);
float gl0 = atomI.q*atomJ.q;
float gl1 = atomJ.q*sc3 - atomI.q*sc4;
float gl2 = atomI.q*sc6 + atomJ.q*sc5 - sc3*sc4;
float gl3 = sc3*sc6 - sc4*sc5;
float gl4 = sc5*sc6;
float gl6 = sc2;
float gl7 = 2.0f*(sc7-sc8);
float gl8 = 2.0f*sc10;
float gl5 = -4.0f*sc9;
float gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*(gl2+gl7+gl8) + rr9*(gl3+gl5) + rr11*gl4;
#endif
#if defined F1 || defined T1
float gf2 = -atomJ.q*rr3 + sc4*rr5 - sc6*rr7;
float gf5 = 2.0f*(-atomJ.q*rr5+sc4*rr7-sc6*rr9);
#endif
#if defined F1 || defined T3
float gf3 = atomI.q*rr3 + sc3*rr5 + sc5*rr7;
float gf6 = 2.0f*(-atomI.q*rr5-sc3*rr7-sc5*rr9);
#endif
#ifdef F1
float em = scalingFactors[MScaleIndex]*(rr1*gl0 + rr3*(gl1+gl6) + rr5*(gl2+gl7+gl8) + rr7*(gl3+gl5) + rr9*gl4);
float ei = 0.5f*((gli1+gli6)*psc3 + (gli2+gli7)*psc5 + gli3*psc7);
*energy = em+ei;
#endif
#if defined F1 || defined T1
float qIdJ_0 = atomI.labFrameQuadrupole[0]*atomJ.labFrameDipole[0] + atomI.labFrameQuadrupole[3]*atomJ.labFrameDipole[1] + atomI.labFrameQuadrupole[6]*atomJ.labFrameDipole[2];
float qIdJ_1 = atomI.labFrameQuadrupole[1]*atomJ.labFrameDipole[0] + atomI.labFrameQuadrupole[4]*atomJ.labFrameDipole[1] + atomI.labFrameQuadrupole[7]*atomJ.labFrameDipole[2];
float qIdJ_2 = atomI.labFrameQuadrupole[2]*atomJ.labFrameDipole[0] + atomI.labFrameQuadrupole[5]*atomJ.labFrameDipole[1] + atomI.labFrameQuadrupole[8]*atomJ.labFrameDipole[2];
float qIqJr_0 = atomI.labFrameQuadrupole[0]*qJr_0 + atomI.labFrameQuadrupole[3]*qJr_1 + atomI.labFrameQuadrupole[6]*qJr_2;
float qIqJr_1 = atomI.labFrameQuadrupole[1]*qJr_0 + atomI.labFrameQuadrupole[4]*qJr_1 + atomI.labFrameQuadrupole[7]*qJr_2;
float qIqJr_2 = atomI.labFrameQuadrupole[2]*qJr_0 + atomI.labFrameQuadrupole[5]*qJr_1 + atomI.labFrameQuadrupole[8]*qJr_2;
#endif
#ifdef F1
float qkqir_0 = atomJ.labFrameQuadrupole[0]*qIr_0 + atomJ.labFrameQuadrupole[3]*qIr_1 + atomJ.labFrameQuadrupole[6]*qIr_2;
float qkqir_1 = atomJ.labFrameQuadrupole[1]*qIr_0 + atomJ.labFrameQuadrupole[4]*qIr_1 + atomJ.labFrameQuadrupole[7]*qIr_2;
float qkqir_2 = atomJ.labFrameQuadrupole[2]*qIr_0 + atomJ.labFrameQuadrupole[5]*qIr_1 + atomJ.labFrameQuadrupole[8]*qIr_2;
float qkdi_0 = atomJ.labFrameQuadrupole[0]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[3]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[6]*atomI.labFrameDipole[2];
float qkdi_1 = atomJ.labFrameQuadrupole[1]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[4]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[7]*atomI.labFrameDipole[2];
float qkdi_2 = atomJ.labFrameQuadrupole[2]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[5]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[8]*atomI.labFrameDipole[2];
float ftm2_0 = scalingFactors[MScaleIndex]*(gf1*xr + gf2*atomI.labFrameDipole[0] + gf3*atomJ.labFrameDipole[0] + 2.0f*rr5*(qkdi_0 - qIdJ_0) + gf5*qIr_0 + gf6*qJr_0 + 4.0f*rr7*(qIqJr_0 + qkqir_0));
float ftm2_1 = scalingFactors[MScaleIndex]*(gf1*yr + gf2*atomI.labFrameDipole[1] + gf3*atomJ.labFrameDipole[1] + 2.0f*rr5*(qkdi_1 - qIdJ_1) + gf5*qIr_1 + gf6*qJr_1 + 4.0f*rr7*(qIqJr_1 + qkqir_1));
float ftm2_2 = scalingFactors[MScaleIndex]*(gf1*zr + gf2*atomI.labFrameDipole[2] + gf3*atomJ.labFrameDipole[2] + 2.0f*rr5*(qkdi_2 - qIdJ_2) + gf5*qIr_2 + gf6*qJr_2 + 4.0f*rr7*(qIqJr_2 + qkqir_2));
float gfi1 = rr2*(1.5f*((gli1+gli6)*psc3 + (glip1+glip6)*dsc3 + scip2*scale3i) + 2.5f*((gli7+gli2)*psc5 + (glip7+glip2)*dsc5 - (sci3*scip4+scip3*sci4)*scale5i) + 3.5f*(gli3*psc7+glip3*dsc7));
ftm2i_0 += gfi1*xr;
ftm2i_1 += gfi1*yr;
ftm2i_2 += gfi1*zr;
#endif
#if defined F1 || defined T1
float gfi5 = (sci4*psc7 + scip4*dsc7);
#endif
#if defined F1 || defined T3
float gfi6 = -(sci3*psc7 + scip3*dsc7);
#endif
#if defined F1 || defined T1
float qIuJ_0 = atomI.labFrameQuadrupole[0]*atomJ.inducedDipole[0] + atomI.labFrameQuadrupole[3]*atomJ.inducedDipole[1] + atomI.labFrameQuadrupole[6]*atomJ.inducedDipole[2];
float qIuJ_1 = atomI.labFrameQuadrupole[1]*atomJ.inducedDipole[0] + atomI.labFrameQuadrupole[4]*atomJ.inducedDipole[1] + atomI.labFrameQuadrupole[7]*atomJ.inducedDipole[2];
float qIuJ_2 = atomI.labFrameQuadrupole[2]*atomJ.inducedDipole[0] + atomI.labFrameQuadrupole[5]*atomJ.inducedDipole[1] + atomI.labFrameQuadrupole[8]*atomJ.inducedDipole[2];
float qIuJp_0 = atomI.labFrameQuadrupole[0]*atomJ.inducedDipoleP[0] + atomI.labFrameQuadrupole[3]*atomJ.inducedDipoleP[1] + atomI.labFrameQuadrupole[6]*atomJ.inducedDipoleP[2];
float qIuJp_1 = atomI.labFrameQuadrupole[1]*atomJ.inducedDipoleP[0] + atomI.labFrameQuadrupole[4]*atomJ.inducedDipoleP[1] + atomI.labFrameQuadrupole[7]*atomJ.inducedDipoleP[2];
float qIuJp_2 = atomI.labFrameQuadrupole[2]*atomJ.inducedDipoleP[0] + atomI.labFrameQuadrupole[5]*atomJ.inducedDipoleP[1] + atomI.labFrameQuadrupole[8]*atomJ.inducedDipoleP[2];
#endif
#if defined T3
float qJuIp_0 = atomJ.labFrameQuadrupole[0]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[3]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[6]*atomI.inducedDipoleP[2];
float qJuIp_1 = atomJ.labFrameQuadrupole[1]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[4]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[7]*atomI.inducedDipoleP[2];
float qJuIp_2 = atomJ.labFrameQuadrupole[2]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[5]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[8]*atomI.inducedDipoleP[2];
float qJuI_0 = atomJ.labFrameQuadrupole[0]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[3]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[6]*atomI.inducedDipole[2];
float qJuI_1 = atomJ.labFrameQuadrupole[1]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[4]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[7]*atomI.inducedDipole[2];
float qJuI_2 = atomJ.labFrameQuadrupole[2]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[5]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[8]*atomI.inducedDipole[2];
#endif
#ifdef F1
float qkui_0 = atomJ.labFrameQuadrupole[0]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[3]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[6]*atomI.inducedDipole[2];
float qkui_1 = atomJ.labFrameQuadrupole[1]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[4]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[7]*atomI.inducedDipole[2];
float qkui_2 = atomJ.labFrameQuadrupole[2]*atomI.inducedDipole[0] + atomJ.labFrameQuadrupole[5]*atomI.inducedDipole[1] + atomJ.labFrameQuadrupole[8]*atomI.inducedDipole[2];
float qkuip_0 = atomJ.labFrameQuadrupole[0]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[3]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[6]*atomI.inducedDipoleP[2];
float qkuip_1 = atomJ.labFrameQuadrupole[1]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[4]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[7]*atomI.inducedDipoleP[2];
float qkuip_2 = atomJ.labFrameQuadrupole[2]*atomI.inducedDipoleP[0] + atomJ.labFrameQuadrupole[5]*atomI.inducedDipoleP[1] + atomJ.labFrameQuadrupole[8]*atomI.inducedDipoleP[2];
ftm2i_0 += 0.5f*(-atomJ.q*(atomI.inducedDipole[0]*psc3 + atomI.inducedDipoleP[0]*dsc3) +
sc4*(atomI.inducedDipole[0]*psc5 + atomI.inducedDipoleP[0]*dsc5) -
sc6*(atomI.inducedDipole[0]*psc7 + atomI.inducedDipoleP[0]*dsc7)) +
0.5f*(atomI.q*(atomJ.inducedDipole[0]*psc3+atomJ.inducedDipoleP[0]*dsc3) +
sc3*(atomJ.inducedDipole[0]*psc5 +atomJ.inducedDipoleP[0]*dsc5) +
sc5*(atomJ.inducedDipole[0]*psc7 +atomJ.inducedDipoleP[0]*dsc7)) +
scale5i*(sci4*atomI.inducedDipoleP[0]+scip4*atomI.inducedDipole[0] +
sci3*atomJ.inducedDipoleP[0]+scip3*atomJ.inducedDipole[0])*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atomI.labFrameDipole[0] +
0.5f*(sci3*psc5+scip3*dsc5)*atomJ.labFrameDipole[0] +
((qkui_0-qIuJ_0)*psc5 + (qkuip_0-qIuJp_0)*dsc5) +
gfi5*qIr_0 + gfi6*qJr_0;
ftm2i_1 += 0.5f*(-atomJ.q*(atomI.inducedDipole[1]*psc3 + atomI.inducedDipoleP[1]*dsc3) +
sc4*(atomI.inducedDipole[1]*psc5 + atomI.inducedDipoleP[1]*dsc5) -
sc6*(atomI.inducedDipole[1]*psc7 + atomI.inducedDipoleP[1]*dsc7)) +
(atomI.q*(atomJ.inducedDipole[1]*psc3+atomJ.inducedDipoleP[1]*dsc3) +
sc3*(atomJ.inducedDipole[1]*psc5 +atomJ.inducedDipoleP[1]*dsc5) +
sc5*(atomJ.inducedDipole[1]*psc7 +atomJ.inducedDipoleP[1]*dsc7))*0.5f +
scale5i*(sci4*atomI.inducedDipoleP[1]+scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1]+scip3*atomJ.inducedDipole[1])*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atomI.labFrameDipole[1] +
0.5f*(sci3*psc5+scip3*dsc5)*atomJ.labFrameDipole[1] +
((qkui_1-qIuJ_1)*psc5 + (qkuip_1-qIuJp_1)*dsc5) +
gfi5*qIr_1 + gfi6*qJr_1;
ftm2i_2 += 0.5f*(-atomJ.q*(atomI.inducedDipole[2]*psc3 + atomI.inducedDipoleP[2]*dsc3) +
sc4*(atomI.inducedDipole[2]*psc5 + atomI.inducedDipoleP[2]*dsc5) -
sc6*(atomI.inducedDipole[2]*psc7 + atomI.inducedDipoleP[2]*dsc7)) +
(atomI.q*(atomJ.inducedDipole[2]*psc3+atomJ.inducedDipoleP[2]*dsc3) +
sc3*(atomJ.inducedDipole[2]*psc5 +atomJ.inducedDipoleP[2]*dsc5) +
sc5*(atomJ.inducedDipole[2]*psc7 +atomJ.inducedDipoleP[2]*dsc7))*0.5f +
scale5i*(sci4*atomI.inducedDipoleP[2]+scip4*atomI.inducedDipole[2] +
sci3*atomJ.inducedDipoleP[2]+scip3*atomJ.inducedDipole[2])*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atomI.labFrameDipole[2] +
0.5f*(sci3*psc5+scip3*dsc5)*atomJ.labFrameDipole[2] +
((qkui_2-qIuJ_2)*psc5 + (qkuip_2-qIuJp_2)*dsc5) +
gfi5*qIr_2 + gfi6*qJr_2;
if( cAmoebaSim.polarizationType )
{
float gfd = 0.5*(3.0*rr2*scip2*scale3i - 5.0f*rr2*(scip3*sci4+sci3*scip4)*scale5i);
float temp5 = 0.5*scale5i;
float fdir_0 = gfd*xr + temp5*(sci4*atomI.inducedDipoleP[0] + scip4*atomI.inducedDipole[0] + sci3*atomJ.inducedDipoleP[0] + scip3*atomJ.inducedDipole[0]);
float fdir_1 = gfd*yr + temp5*(sci4*atomI.inducedDipoleP[1] + scip4*atomI.inducedDipole[1] + sci3*atomJ.inducedDipoleP[1] + scip3*atomJ.inducedDipole[1]);
float fdir_2 = gfd*zr + temp5*(sci4*atomI.inducedDipoleP[2] + scip4*atomI.inducedDipole[2] + sci3*atomJ.inducedDipoleP[2] + scip3*atomJ.inducedDipole[2]);
ftm2i_0 -= fdir_0;
ftm2i_1 -= fdir_1;
ftm2i_2 -= fdir_2;
} else {
float scaleF = 0.5f*scalingFactors[UScaleIndex];
float inducedFactor3 = scip2*rr3*scaleF;
float inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
float findmp_0 = inducedFactor3*ddsc3_0 - inducedFactor5*ddsc5_0;
float findmp_1 = inducedFactor3*ddsc3_1 - inducedFactor5*ddsc5_1;
float findmp_2 = inducedFactor3*ddsc3_2 - inducedFactor5*ddsc5_2;
ftm2i_0 -= findmp_0;
ftm2i_1 -= findmp_1;
ftm2i_2 -= findmp_2;
}
#endif
#if defined T1
float gti2 = 0.5f*(sci4*psc5+scip4*dsc5);
float gti5 = gfi5;
#endif
#if defined T3
float gti3 = 0.5f*(sci3*psc5+scip3*dsc5);
float gti6 = gfi6;
#endif
#if defined T1 || defined T3
float dixdk_0 = atomI.labFrameDipole[1]*atomJ.labFrameDipole[2] - atomI.labFrameDipole[2]*atomJ.labFrameDipole[1];
float dixdk_1 = atomI.labFrameDipole[2]*atomJ.labFrameDipole[0] - atomI.labFrameDipole[0]*atomJ.labFrameDipole[2];
float dixdk_2 = atomI.labFrameDipole[0]*atomJ.labFrameDipole[1] - atomI.labFrameDipole[1]*atomJ.labFrameDipole[0];
#if defined T1
float dixuk_0 = atomI.labFrameDipole[1]*atomJ.inducedDipole[2] - atomI.labFrameDipole[2]*atomJ.inducedDipole[1];
float dixuk_1 = atomI.labFrameDipole[2]*atomJ.inducedDipole[0] - atomI.labFrameDipole[0]*atomJ.inducedDipole[2];
float dixuk_2 = atomI.labFrameDipole[0]*atomJ.inducedDipole[1] - atomI.labFrameDipole[1]*atomJ.inducedDipole[0];
#endif
#endif
#ifdef T1
float dixukp_0 = atomI.labFrameDipole[1]*atomJ.inducedDipoleP[2] - atomI.labFrameDipole[2]*atomJ.inducedDipoleP[1];
float dixukp_1 = atomI.labFrameDipole[2]*atomJ.inducedDipoleP[0] - atomI.labFrameDipole[0]*atomJ.inducedDipoleP[2];
float dixukp_2 = atomI.labFrameDipole[0]*atomJ.inducedDipoleP[1] - atomI.labFrameDipole[1]*atomJ.inducedDipoleP[0];
#endif
#ifdef T1
float dixr_0 = atomI.labFrameDipole[1]*zr - atomI.labFrameDipole[2]*yr;
float dixr_1 = atomI.labFrameDipole[2]*xr - atomI.labFrameDipole[0]*zr;
float dixr_2 = atomI.labFrameDipole[0]*yr - atomI.labFrameDipole[1]*xr;
#endif
#ifdef T1
float rxqiukp_0 = yr*qIuJp_2 - zr*qIuJp_1;
float rxqiukp_1 = zr*qIuJp_0 - xr*qIuJp_2;
float rxqiukp_2 = xr*qIuJp_1 - yr*qIuJp_0;
float rxqir_0 = yr*qIr_2 - zr*qIr_1;
float rxqir_1 = zr*qIr_0 - xr*qIr_2;
float rxqir_2 = xr*qIr_1 - yr*qIr_0;
float rxqiuk_0 = yr*qIuJ_2 - zr*qIuJ_1;
float rxqiuk_1 = zr*qIuJ_0 - xr*qIuJ_2;
float rxqiuk_2 = xr*qIuJ_1 - yr*qIuJ_0;
float ukxqir_0 = atomJ.inducedDipole[1]*qIr_2 - atomJ.inducedDipole[2]*qIr_1;
float ukxqir_1 = atomJ.inducedDipole[2]*qIr_0 - atomJ.inducedDipole[0]*qIr_2;
float ukxqir_2 = atomJ.inducedDipole[0]*qIr_1 - atomJ.inducedDipole[1]*qIr_0;
float ukxqirp_0 = atomJ.inducedDipoleP[1]*qIr_2 - atomJ.inducedDipoleP[2]*qIr_1;
float ukxqirp_1 = atomJ.inducedDipoleP[2]*qIr_0 - atomJ.inducedDipoleP[0]*qIr_2;
float ukxqirp_2 = atomJ.inducedDipoleP[0]*qIr_1 - atomJ.inducedDipoleP[1]*qIr_0;
float dixqkr_0 = atomI.labFrameDipole[1]*qJr_2 - atomI.labFrameDipole[2]*qJr_1;
float dixqkr_1 = atomI.labFrameDipole[2]*qJr_0 - atomI.labFrameDipole[0]*qJr_2;
float dixqkr_2 = atomI.labFrameDipole[0]*qJr_1 - atomI.labFrameDipole[1]*qJr_0;
float dkxqir_0 = atomJ.labFrameDipole[1]*qIr_2 - atomJ.labFrameDipole[2]*qIr_1;
float dkxqir_1 = atomJ.labFrameDipole[2]*qIr_0 - atomJ.labFrameDipole[0]*qIr_2;
float dkxqir_2 = atomJ.labFrameDipole[0]*qIr_1 - atomJ.labFrameDipole[1]*qIr_0;
float rxqikr_0 = yr*qIqJr_2 - zr*qIqJr_1;
float rxqikr_1 = zr*qIqJr_0 - xr*qIqJr_2;
float rxqikr_2 = xr*qIqJr_1 - yr*qIqJr_0;
float rxqidk_0 = yr*qIdJ_2 - zr*qIdJ_1;
float rxqidk_1 = zr*qIdJ_0 - xr*qIdJ_2;
float rxqidk_2 = xr*qIdJ_1 - yr*qIdJ_0;
float qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
float qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
float qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
#endif
#if defined T1 || defined T3
float qixqk_0 = atomI.labFrameQuadrupole[1]*atomJ.labFrameQuadrupole[2] + atomI.labFrameQuadrupole[4]*atomJ.labFrameQuadrupole[5] + atomI.labFrameQuadrupole[7]*atomJ.labFrameQuadrupole[8] -
atomI.labFrameQuadrupole[2]*atomJ.labFrameQuadrupole[1] - atomI.labFrameQuadrupole[5]*atomJ.labFrameQuadrupole[4] - atomI.labFrameQuadrupole[8]*atomJ.labFrameQuadrupole[7];
float qixqk_1 = atomI.labFrameQuadrupole[2]*atomJ.labFrameQuadrupole[0] + atomI.labFrameQuadrupole[5]*atomJ.labFrameQuadrupole[3] + atomI.labFrameQuadrupole[8]*atomJ.labFrameQuadrupole[6] -
atomI.labFrameQuadrupole[0]*atomJ.labFrameQuadrupole[2] - atomI.labFrameQuadrupole[3]*atomJ.labFrameQuadrupole[5] - atomI.labFrameQuadrupole[6]*atomJ.labFrameQuadrupole[8];
float qixqk_2 = atomI.labFrameQuadrupole[0]*atomJ.labFrameQuadrupole[1] + atomI.labFrameQuadrupole[3]*atomJ.labFrameQuadrupole[4] + atomI.labFrameQuadrupole[6]*atomJ.labFrameQuadrupole[7] -
atomI.labFrameQuadrupole[1]*atomJ.labFrameQuadrupole[0] - atomI.labFrameQuadrupole[4]*atomJ.labFrameQuadrupole[3] - atomI.labFrameQuadrupole[7]*atomJ.labFrameQuadrupole[6];
#endif
#ifdef T1
float ttm2_0 = -rr3*dixdk_0 + gf2*dixr_0-gf5*rxqir_0 + 2.0f*rr5*(dixqkr_0 + dkxqir_0 + rxqidk_0-2.0f*qixqk_0) - 4.0f*rr7*(rxqikr_0 + qkrxqir_0);
float ttm2_1 = -rr3*dixdk_1 + gf2*dixr_1-gf5*rxqir_1 + 2.0f*rr5*(dixqkr_1 + dkxqir_1 + rxqidk_1-2.0f*qixqk_1) - 4.0f*rr7*(rxqikr_1 + qkrxqir_1);
float ttm2_2 = -rr3*dixdk_2 + gf2*dixr_2-gf5*rxqir_2 + 2.0f*rr5*(dixqkr_2 + dkxqir_2 + rxqidk_2-2.0f*qixqk_2) - 4.0f*rr7*(rxqikr_2 + qkrxqir_2);
float ttm2i_0 = -(dixuk_0*psc3+dixukp_0*dsc3)*0.5f + gti2*dixr_0 + ((ukxqir_0+ rxqiuk_0)*psc5 + (ukxqirp_0 + rxqiukp_0)*dsc5) - gti5*rxqir_0;
float ttm2i_1 = -(dixuk_1*psc3+dixukp_1*dsc3)*0.5f + gti2*dixr_1 + ((ukxqir_1+ rxqiuk_1)*psc5 + (ukxqirp_1 + rxqiukp_1)*dsc5) - gti5*rxqir_1;
float ttm2i_2 = -(dixuk_2*psc3+dixukp_2*dsc3)*0.5f + gti2*dixr_2 + ((ukxqir_2+ rxqiuk_2)*psc5 + (ukxqirp_2 + rxqiukp_2)*dsc5) - gti5*rxqir_2;
#endif
#ifdef T3
float qJqIr_0 = atomJ.labFrameQuadrupole[0]*qIr_0 + atomJ.labFrameQuadrupole[3]*qIr_1 + atomJ.labFrameQuadrupole[6]*qIr_2;
float qJqIr_1 = atomJ.labFrameQuadrupole[1]*qIr_0 + atomJ.labFrameQuadrupole[4]*qIr_1 + atomJ.labFrameQuadrupole[7]*qIr_2;
float qJqIr_2 = atomJ.labFrameQuadrupole[2]*qIr_0 + atomJ.labFrameQuadrupole[5]*qIr_1 + atomJ.labFrameQuadrupole[8]*qIr_2;
float qJdI_0 = atomJ.labFrameQuadrupole[0]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[3]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[6]*atomI.labFrameDipole[2];
float qJdI_1 = atomJ.labFrameQuadrupole[1]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[4]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[7]*atomI.labFrameDipole[2];
float qJdI_2 = atomJ.labFrameQuadrupole[2]*atomI.labFrameDipole[0] + atomJ.labFrameQuadrupole[5]*atomI.labFrameDipole[1] + atomJ.labFrameQuadrupole[8]*atomI.labFrameDipole[2];
float dkxr_0 = atomJ.labFrameDipole[1]*zr - atomJ.labFrameDipole[2]*yr;
float dkxr_1 = atomJ.labFrameDipole[2]*xr - atomJ.labFrameDipole[0]*zr;
float dkxr_2 = atomJ.labFrameDipole[0]*yr - atomJ.labFrameDipole[1]*xr;
float rxqkr_0 = yr*qJr_2 - zr*qJr_1;
float rxqkr_1 = zr*qJr_0 - xr*qJr_2;
float rxqkr_2 = xr*qJr_1 - yr*qJr_0;
float dixqkr_0 = atomI.labFrameDipole[1]*qJr_2 - atomI.labFrameDipole[2]*qJr_1;
float dixqkr_1 = atomI.labFrameDipole[2]*qJr_0 - atomI.labFrameDipole[0]*qJr_2;
float dixqkr_2 = atomI.labFrameDipole[0]*qJr_1 - atomI.labFrameDipole[1]*qJr_0;
float dkxqir_0 = atomJ.labFrameDipole[1]*qIr_2 - atomJ.labFrameDipole[2]*qIr_1;
float dkxqir_1 = atomJ.labFrameDipole[2]*qIr_0 - atomJ.labFrameDipole[0]*qIr_2;
float dkxqir_2 = atomJ.labFrameDipole[0]*qIr_1 - atomJ.labFrameDipole[1]*qIr_0;
float rxqkdi_0 = yr*qJdI_2 - zr*qJdI_1;
float rxqkdi_1 = zr*qJdI_0 - xr*qJdI_2;
float rxqkdi_2 = xr*qJdI_1 - yr*qJdI_0;
float rxqkir_0 = yr*qJqIr_2 - zr*qJqIr_1;
float rxqkir_1 = zr*qJqIr_0 - xr*qJqIr_2;
float rxqkir_2 = xr*qJqIr_1 - yr*qJqIr_0;
float qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
float qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
float qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
float dkxui_0 = atomJ.labFrameDipole[1]*atomI.inducedDipole[2] - atomJ.labFrameDipole[2]*atomI.inducedDipole[1];
float dkxui_1 = atomJ.labFrameDipole[2]*atomI.inducedDipole[0] - atomJ.labFrameDipole[0]*atomI.inducedDipole[2];
float dkxui_2 = atomJ.labFrameDipole[0]*atomI.inducedDipole[1] - atomJ.labFrameDipole[1]*atomI.inducedDipole[0];
float dkxuip_0 = atomJ.labFrameDipole[1]*atomI.inducedDipoleP[2] - atomJ.labFrameDipole[2]*atomI.inducedDipoleP[1];
float dkxuip_1 = atomJ.labFrameDipole[2]*atomI.inducedDipoleP[0] - atomJ.labFrameDipole[0]*atomI.inducedDipoleP[2];
float dkxuip_2 = atomJ.labFrameDipole[0]*atomI.inducedDipoleP[1] - atomJ.labFrameDipole[1]*atomI.inducedDipoleP[0];
float uixqkrp_0 = atomI.inducedDipoleP[1]*qJr_2 - atomI.inducedDipoleP[2]*qJr_1;
float uixqkrp_1 = atomI.inducedDipoleP[2]*qJr_0 - atomI.inducedDipoleP[0]*qJr_2;
float uixqkrp_2 = atomI.inducedDipoleP[0]*qJr_1 - atomI.inducedDipoleP[1]*qJr_0;
float uixqkr_0 = atomI.inducedDipole[1]*qJr_2 - atomI.inducedDipole[2]*qJr_1;
float uixqkr_1 = atomI.inducedDipole[2]*qJr_0 - atomI.inducedDipole[0]*qJr_2;
float uixqkr_2 = atomI.inducedDipole[0]*qJr_1 - atomI.inducedDipole[1]*qJr_0;
float rxqkuip_0 = yr*qJuIp_2 - zr*qJuIp_1;
float rxqkuip_1 = zr*qJuIp_0 - xr*qJuIp_2;
float rxqkuip_2 = xr*qJuIp_1 - yr*qJuIp_0;
float rxqkui_0 = yr*qJuI_2 - zr*qJuI_1;
float rxqkui_1 = zr*qJuI_0 - xr*qJuI_2;
float rxqkui_2 = xr*qJuI_1 - yr*qJuI_0;
float ttm3_0 = rr3*dixdk_0 + gf3*dkxr_0 - gf6*rxqkr_0 - 2.0f*rr5*(dixqkr_0 + dkxqir_0 + rxqkdi_0 - 2.0f*qixqk_0) - 4.0f*rr7*(rxqkir_0 - qkrxqir_0);
float ttm3_1 = rr3*dixdk_1 + gf3*dkxr_1 - gf6*rxqkr_1 - 2.0f*rr5*(dixqkr_1 + dkxqir_1 + rxqkdi_1 - 2.0f*qixqk_1) - 4.0f*rr7*(rxqkir_1 - qkrxqir_1);
float ttm3_2 = rr3*dixdk_2 + gf3*dkxr_2 - gf6*rxqkr_2 - 2.0f*rr5*(dixqkr_2 + dkxqir_2 + rxqkdi_2 - 2.0f*qixqk_2) - 4.0f*rr7*(rxqkir_2 - qkrxqir_2);
float ttm3i_0 = -(dkxui_0*psc3+ dkxuip_0*dsc3)*0.5f + gti3*dkxr_0 - ((uixqkr_0 + rxqkui_0)*psc5 + (uixqkrp_0 + rxqkuip_0)*dsc5) - gti6*rxqkr_0;
float ttm3i_1 = -(dkxui_1*psc3+ dkxuip_1*dsc3)*0.5f + gti3*dkxr_1 - ((uixqkr_1 + rxqkui_1)*psc5 + (uixqkrp_1 + rxqkuip_1)*dsc5) - gti6*rxqkr_1;
float ttm3i_2 = -(dkxui_2*psc3+ dkxuip_2*dsc3)*0.5f + gti3*dkxr_2 - ((uixqkr_2 + rxqkui_2)*psc5 + (uixqkrp_2 + rxqkuip_2)*dsc5) - gti6*rxqkr_2;
#endif
if( scalingFactors[MScaleIndex] < 1.0f ){
#ifdef T1
ttm2_0 *= scalingFactors[MScaleIndex];
ttm2_1 *= scalingFactors[MScaleIndex];
ttm2_2 *= scalingFactors[MScaleIndex];
#endif
#ifdef T3
ttm3_0 *= scalingFactors[MScaleIndex];
ttm3_1 *= scalingFactors[MScaleIndex];
ttm3_2 *= scalingFactors[MScaleIndex];
#endif
}
#ifdef F1
outputForce[0] = -(ftm2_0+ftm2i_0);
outputForce[1] = -(ftm2_1+ftm2i_1);
outputForce[2] = -(ftm2_2+ftm2i_2);
#endif
#ifdef T1
outputForce[0] = (ttm2_0 + ttm2i_0);
outputForce[1] = (ttm2_1 + ttm2i_1);
outputForce[2] = (ttm2_2 + ttm2i_2);
#endif
#ifdef T3
outputForce[0] = (ttm3_0 + ttm3i_0);
outputForce[1] = (ttm3_1 + ttm3i_1);
outputForce[2] = (ttm3_2 + ttm3i_2);
#endif
return;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaCudaFixedEAndGKFieldsSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaCudaFixedEAndGKFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEAndGKFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
// reduce psWorkArray_3_1 -> E_Field
// reduce psWorkArray_3_2 -> E_FieldPolar
// reduce psWorkArray_3_3 -> Gk_FieldPolar
static void kReduceEAndGkFields(amoebaGpuContext amoebaGpu )
{
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psE_Field->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields1");
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevData, amoebaGpu->psE_FieldPolar->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields2");
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_3->_pDevData, amoebaGpu->psGk_Field->_pDevData, 0 );
LAUNCHERROR("kReduceEAndGK_Fields3");
}
// file includes FixedFieldParticle struct definition/load/unload struct and kernel body for fixed E-field
#define GK
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
#undef GK
__device__ void calculateFixedGkFieldPairIxn_kernel( float4 atomCoordinatesI, float4 atomCoordinatesJ,
float* labFrameDipoleI, float* labFrameDipoleJ,
float* labFrameQuadrupoleI, float* labFrameQuadrupoleJ,
float rb2,
float outputField[2][3]
){
float xi,yi,zi;
float xr,yr,zr;
float xr2,yr2,zr2;
float ci,ck;
float uxi,uyi,uzi;
float uxk,uyk,uzk;
float qxxi,qxyi,qxzi;
float qyyi,qyzi,qzzi;
float qxxk,qxyk,qxzk;
float qyyk,qyzk,qzzk;
float r2;
float fc,fd,fq;
float expterm;
float gf,gf2,gf3,gf5;
float gf7;
float expc,dexpc;
float expc1,expcdexpc;
float a[4][4];
float gc[5];
float gux[11],guy[11],guz[11];
float gqxx[5],gqxy[5];
float gqxz[5],gqyy[5];
float gqyz[5],gqzz[5];
float gkc;
gkc = cAmoebaSim.gkc;
fc = cAmoebaSim.fc;
fd = cAmoebaSim.fd;
fq = cAmoebaSim.fq;
xi = atomCoordinatesI.x;
yi = atomCoordinatesI.y;
zi = atomCoordinatesI.z;
ci = atomCoordinatesI.w;
uxi = labFrameDipoleI[0];
uyi = labFrameDipoleI[1];
uzi = labFrameDipoleI[2];
qxxi = labFrameQuadrupoleI[0];
qxyi = labFrameQuadrupoleI[1];
qxzi = labFrameQuadrupoleI[2];
qyyi = labFrameQuadrupoleI[4];
qyzi = labFrameQuadrupoleI[5];
qzzi = labFrameQuadrupoleI[8];
xr = atomCoordinatesJ.x - xi;
yr = atomCoordinatesJ.y - yi;
zr = atomCoordinatesJ.z - zi;
ck = atomCoordinatesJ.w;
xr2 = xr*xr;
yr2 = yr*yr;
zr2 = zr*zr;
r2 = xr2 + yr2 + zr2;
uxk = labFrameDipoleJ[0];
uyk = labFrameDipoleJ[1];
uzk = labFrameDipoleJ[2];
qxxk = labFrameQuadrupoleJ[0];
qxyk = labFrameQuadrupoleJ[1];
qxzk = labFrameQuadrupoleJ[2];
qyyk = labFrameQuadrupoleJ[4];
qyzk = labFrameQuadrupoleJ[5];
qzzk = labFrameQuadrupoleJ[8];
expterm = expf(-r2/(gkc*rb2));
expc = expterm / gkc;
dexpc = -2.0f / (gkc*rb2);
gf2 = 1.0f / (r2+rb2*expterm);
gf = sqrtf(gf2);
gf3 = gf2 * gf;
gf5 = gf3 * gf2;
gf7 = gf5 * gf2;
// reaction potential auxiliary terms
a[0][0] = gf;
a[1][0] = -gf3;
a[2][0] = 3.0f * gf5;
a[3][0] = -15.0f * gf7;
// reaction potential gradient auxiliary terms
expc1 = 1.0f - expc;
a[0][1] = expc1 * a[1][0];
a[1][1] = expc1 * a[2][0];
a[2][1] = expc1 * a[3][0];
// dipole second reaction potential gradient auxiliary term
expcdexpc = -expc * dexpc;
a[1][2] = expc1*a[2][1] + expcdexpc*a[2][0];
// multiply the auxillary terms by dielectric functions;
a[0][1] = fc * a[0][1];
a[1][0] = fd * a[1][0];
a[1][1] = fd * a[1][1];
a[1][2] = fd * a[1][2];
a[2][0] = fq * a[2][0];
a[2][1] = fq * a[2][1];
// unweighted dipole reaction potential tensor
gux[1] = xr * a[1][0];
guy[1] = yr * a[1][0];
guz[1] = zr * a[1][0];
// unweighted reaction potential gradient tensor
gc[2] = xr * a[0][1];
gc[3] = yr * a[0][1];
gc[4] = zr * a[0][1];
gux[2] = a[1][0] + xr2*a[1][1];
gux[3] = xr * yr * a[1][1];
gux[4] = xr * zr * a[1][1];
guy[2] = gux[3];
guy[3] = a[1][0] + yr2*a[1][1];
guy[4] = yr * zr * a[1][1];
guz[2] = gux[4];
guz[3] = guy[4];
guz[4] = a[1][0] + zr2*a[1][1];
gqxx[2] = xr * (2.0f*a[2][0]+xr2*a[2][1]);
gqxx[3] = yr * xr2*a[2][1];
gqxx[4] = zr * xr2*a[2][1];
gqyy[2] = xr * yr2*a[2][1];
gqyy[3] = yr * (2.0f*a[2][0]+yr2*a[2][1]);
gqyy[4] = zr * yr2 * a[2][1];
gqzz[2] = xr * zr2 * a[2][1];
gqzz[3] = yr * zr2 * a[2][1];
gqzz[4] = zr * (2.0f*a[2][0]+zr2*a[2][1]);
gqxy[2] = yr * (a[2][0]+xr2*a[2][1]);
gqxy[3] = xr * (a[2][0]+yr2*a[2][1]);
gqxy[4] = zr * xr * yr * a[2][1];
gqxz[2] = zr * (a[2][0]+xr2*a[2][1]);
gqxz[3] = gqxy[4];
gqxz[4] = xr * (a[2][0]+zr2*a[2][1]);
gqyz[2] = gqxy[4];
gqyz[3] = zr * (a[2][0]+yr2*a[2][1]);
gqyz[4] = yr * (a[2][0]+zr2*a[2][1]);
// unweighted dipole second reaction potential gradient tensor
gux[5] = xr * (3.0f*a[1][1]+xr2*a[1][2]);
gux[6] = yr * (a[1][1]+xr2*a[1][2]);
gux[7] = zr * (a[1][1]+xr2*a[1][2]);
gux[8] = xr * (a[1][1]+yr2*a[1][2]);
gux[9] = zr * xr * yr * a[1][2];
gux[10] = xr * (a[1][1]+zr2*a[1][2]);
guy[5] = yr * (a[1][1]+xr2*a[1][2]);
guy[6] = xr * (a[1][1]+yr2*a[1][2]);
guy[7] = gux[9];
guy[8] = yr * (3.0f*a[1][1]+yr2*a[1][2]);
guy[9] = zr * (a[1][1]+yr2*a[1][2]);
guy[10] = yr * (a[1][1]+zr2*a[1][2]);
guz[5] = zr * (a[1][1]+xr2*a[1][2]);
guz[6] = gux[9];
guz[7] = xr * (a[1][1]+zr2*a[1][2]);
guz[8] = zr * (a[1][1]+yr2*a[1][2]);
guz[9] = yr * (a[1][1]+zr2*a[1][2]);
guz[10] = zr * (3.0f*a[1][1]+zr2*a[1][2]);
// generalized Kirkwood permanent reaction field
outputField[0][0] = uxk*gux[2] + uyk*gux[3] + uzk*gux[4]
+ 0.5f * (ck*gux[1] + qxxk*gux[5]
+ qyyk*gux[8] + qzzk*gux[10]
+ 2.0f*(qxyk*gux[6]+qxzk*gux[7]
+ qyzk*gux[9]))
+ 0.5f * (ck*gc[2] + qxxk*gqxx[2]
+ qyyk*gqyy[2] + qzzk*gqzz[2]
+ 2.0f*(qxyk*gqxy[2]+qxzk*gqxz[2]
+ qyzk*gqyz[2]));
outputField[0][1] = uxk*guy[2] + uyk*guy[3] + uzk*guy[4]
+ 0.5f * (ck*guy[1] + qxxk*guy[5]
+ qyyk*guy[8] + qzzk*guy[10]
+ 2.0f*(qxyk*guy[6]+qxzk*guy[7]
+ qyzk*guy[9]))
+ 0.5f * (ck*gc[3] + qxxk*gqxx[3]
+ qyyk*gqyy[3] + qzzk*gqzz[3]
+ 2.0f*(qxyk*gqxy[3]+qxzk*gqxz[3]
+ qyzk*gqyz[3]));
outputField[0][2] = uxk*guz[2] + uyk*guz[3] + uzk*guz[4]
+ 0.5f * (ck*guz[1] + qxxk*guz[5]
+ qyyk*guz[8] + qzzk*guz[10]
+ 2.0f*(qxyk*guz[6]+qxzk*guz[7]
+ qyzk*guz[9]))
+ 0.5f * (ck*gc[4] + qxxk*gqxx[4]
+ qyyk*gqyy[4] + qzzk*gqzz[4]
+ 2.0f*(qxyk*gqxy[4]+qxzk*gqxz[4]
+ qyzk*gqyz[4]));
outputField[1][0] = uxi*gux[2] + uyi*gux[3] + uzi*gux[4]
- 0.5f * (ci*gux[1] + qxxi*gux[5]
+ qyyi*gux[8] + qzzi*gux[10]
+ 2.0f*(qxyi*gux[6]+qxzi*gux[7]
+ qyzi*gux[9]))
- 0.5f * (ci*gc[2] + qxxi*gqxx[2]
+ qyyi*gqyy[2] + qzzi*gqzz[2]
+ 2.0f*(qxyi*gqxy[2]+qxzi*gqxz[2]
+ qyzi*gqyz[2]));
outputField[1][1] = uxi*guy[2] + uyi*guy[3] + uzi*guy[4]
- 0.5f * (ci*guy[1] + qxxi*guy[5]
+ qyyi*guy[8] + qzzi*guy[10]
+ 2.0f*(qxyi*guy[6]+qxzi*guy[7]
+ qyzi*guy[9]))
- 0.5f * (ci*gc[3] + qxxi*gqxx[3]
+ qyyi*gqyy[3] + qzzi*gqzz[3]
+ 2.0f*(qxyi*gqxy[3]+qxzi*gqxz[3]
+ qyzi*gqyz[3]));
outputField[1][2] = uxi*guz[2] + uyi*guz[3] + uzi*guz[4]
- 0.5f * (ci*guz[1] + qxxi*guz[5]
+ qyyi*guz[8] + qzzi*guz[10]
+ 2.0f*(qxyi*guz[6]+qxzi*guz[7]
+ qyzi*guz[9]))
- 0.5f * (ci*gc[4] + qxxi*gqxx[4]
+ qyyi*gqyy[4] + qzzi*gqzz[4]
+ 2.0f*(qxyi*gqxy[4]+qxzi*gqxz[4]
+ qyzi*gqyz[4]));
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaFixedEAndGkFields.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaFixedEAndGkFields.h"
/**---------------------------------------------------------------------------------------
Compute fixed electric field
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// on first pass, set threads/block
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
maxThreads = 256;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(FixedFieldParticle), gpu->sharedMemoryPerBlock ), maxThreads);
}
kClearFields_3( amoebaGpu, 3 );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaFixedEAndGkFieldN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_2->_pDevData,
amoebaGpu->psWorkArray_3_3->_pDevData );
} else {
kCalculateAmoebaFixedEAndGkFieldN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_2->_pDevData,
amoebaGpu->psWorkArray_3_3->_pDevData );
}
LAUNCHERROR("kCalculateAmoebaFixedEAndGkFieldN2_kernel");
kReduceEAndGkFields( amoebaGpu );
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(256, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1)
#else
__launch_bounds__(64, 1)
#endif
void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
unsigned int* workUnit,
float* outputEField,
float* outputEFieldPolar,
float* outputGkField){
extern __shared__ FixedFieldParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
float4* atomCoord = cSim.pPosq;
float* labFrameDipole = cAmoebaSim.pLabFrameDipole;
float* labFrameQuadrupole = cAmoebaSim.pLabFrameQuadrupole;
float* bornRadii = cSim.pBornRadii;
float4 jCoord;
float jBornRadius;
float jDipole[3];
float jQuadrupole[9];
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
FixedFieldParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
FixedFieldParticle localParticle;
loadFixedFieldShared( &localParticle, atomI, bornRadii );
float4 iCoord = atomCoord[atomI];
float eFieldSum[3];
float eFieldPolarSum[3];
float gkFieldSum[3];
eFieldSum[0] = 0.0f;
eFieldSum[1] = 0.0f;
eFieldSum[2] = 0.0f;
eFieldPolarSum[0] = 0.0f;
eFieldPolarSum[1] = 0.0f;
eFieldPolarSum[2] = 0.0f;
gkFieldSum[0] = 0.0f;
gkFieldSum[1] = 0.0f;
gkFieldSum[2] = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), atomI, bornRadii );
if (!bExclusionFlag)
{
// this branch is never exercised since it includes the
// interaction between atomI and itself which is always excluded
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[2][3];
// load coords, charge, ...
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField);
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
eFieldSum[0] += match ? 0.0f : ijField[0][0];
eFieldSum[1] += match ? 0.0f : ijField[0][1];
eFieldSum[2] += match ? 0.0f : ijField[0][2];
eFieldPolarSum[0] += match ? 0.0f : ijField[0][0];
eFieldPolarSum[1] += match ? 0.0f : ijField[0][1];
eFieldPolarSum[2] += match ? 0.0f : ijField[0][2];
// GK field
calculateFixedGkFieldPairIxn_kernel( iCoord, jCoord,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
bornRadii[atomI]*jBornRadius, ijField);
gkFieldSum[0] += match ? 0.0f : ijField[0][0];
gkFieldSum[1] += match ? 0.0f : ijField[0][1];
gkFieldSum[2] += match ? 0.0f : ijField[0][2];
}
}
else // bExclusion
{
unsigned int xi = x >> GRIDBITS;
unsigned int cell = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[2][3];
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField);
float dScaleVal;
float pScaleVal;
getMaskedDScaleFactor( j, dScaleMask, &dScaleVal );
getMaskedPScaleFactor( j, pScaleMask, &pScaleVal );
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
eFieldSum[0] += match ? 0.0f : dScaleVal*ijField[0][0];
eFieldSum[1] += match ? 0.0f : dScaleVal*ijField[0][1];
eFieldSum[2] += match ? 0.0f : dScaleVal*ijField[0][2];
eFieldPolarSum[0] += match ? 0.0f : pScaleVal*ijField[0][0];
eFieldPolarSum[1] += match ? 0.0f : pScaleVal*ijField[0][1];
eFieldPolarSum[2] += match ? 0.0f : pScaleVal*ijField[0][2];
// GK field
calculateFixedGkFieldPairIxn_kernel( iCoord, jCoord,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
bornRadii[atomI]*jBornRadius, ijField);
match = (atomI >= cSim.atoms) || ((y+tj) >= cSim.atoms) ? 1 : 0;
gkFieldSum[0] += match ? 0.0f : ijField[0][0];
gkFieldSum[1] += match ? 0.0f : ijField[0][1];
gkFieldSum[2] += match ? 0.0f : ijField[0][2];
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, eFieldSum, outputEField );
load3dArrayBufferPerWarp( offset, eFieldPolarSum, outputEFieldPolar );
load3dArrayBufferPerWarp( offset, gkFieldSum, outputGkField );
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, eFieldSum, outputEField );
load3dArray( offset, eFieldPolarSum, outputEFieldPolar );
load3dArray( offset, gkFieldSum, outputGkField );
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx), bornRadii );
}
// zero shared fields
zeroFixedFieldParticleSharedField( &(sA[threadIdx.x]) );
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[2][3];
// load coords, charge, ...
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
eFieldSum[0] += ijField[0][0];
eFieldSum[1] += ijField[0][1];
eFieldSum[2] += ijField[0][2];
eFieldPolarSum[0] += ijField[0][0];
eFieldPolarSum[1] += ijField[0][1];
eFieldPolarSum[2] += ijField[0][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0];
psA[tj].eField[1] += ijField[1][1];
psA[tj].eField[2] += ijField[1][2];
psA[tj].eFieldP[0] += ijField[1][0];
psA[tj].eFieldP[1] += ijField[1][1];
psA[tj].eFieldP[2] += ijField[1][2];
// Gk field
calculateFixedGkFieldPairIxn_kernel( iCoord, jCoord,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
bornRadii[atomI]*jBornRadius, ijField);
gkFieldSum[0] += ijField[0][0];
gkFieldSum[1] += ijField[0][1];
gkFieldSum[2] += ijField[0][2];
psA[tj].gkField[0] += ijField[1][0];
psA[tj].gkField[1] += ijField[1][1];
psA[tj].gkField[2] += ijField[1][2];
tj = (tj + 1) & (GRID - 1);
}
}
else // bExclusion
{
// Read fixed atom data into registers and GRF
unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[2][3];
loadFixedFieldParticleData( &(psA[tj]), &jCoord, jDipole, jQuadrupole, &jBornRadius );
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField);
float dScaleVal;
float pScaleVal;
getMaskedDScaleFactor( tj, dScaleMask, &dScaleVal );
getMaskedPScaleFactor( tj, pScaleMask, &pScaleVal );
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
eFieldSum[0] += dScaleVal*ijField[0][0];
eFieldSum[1] += dScaleVal*ijField[0][1];
eFieldSum[2] += dScaleVal*ijField[0][2];
eFieldPolarSum[0] += pScaleVal*ijField[0][0];
eFieldPolarSum[1] += pScaleVal*ijField[0][1];
eFieldPolarSum[2] += pScaleVal*ijField[0][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += dScaleVal*ijField[1][0];
psA[tj].eField[1] += dScaleVal*ijField[1][1];
psA[tj].eField[2] += dScaleVal*ijField[1][2];
psA[tj].eFieldP[0] += pScaleVal*ijField[1][0];
psA[tj].eFieldP[1] += pScaleVal*ijField[1][1];
psA[tj].eFieldP[2] += pScaleVal*ijField[1][2];
// GK field
calculateFixedGkFieldPairIxn_kernel( iCoord, jCoord,
&(labFrameDipole[atomI*3]), jDipole,
&(labFrameQuadrupole[atomI*9]), jQuadrupole,
bornRadii[atomI]*jBornRadius, ijField);
if( (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ){
gkFieldSum[0] += ijField[0][0];
gkFieldSum[1] += ijField[0][1];
gkFieldSum[2] += ijField[0][2];
psA[tj].gkField[0] += ijField[1][0];
psA[tj].gkField[1] += ijField[1][1];
psA[tj].gkField[2] += ijField[1][2];
}
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp * cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, eFieldSum, outputEField );
load3dArrayBufferPerWarp( offset, eFieldPolarSum, outputEFieldPolar );
load3dArrayBufferPerWarp( offset, gkFieldSum, outputGkField );
offset = 3*(y + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eField, outputEField );
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].gkField, outputGkField );
#else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, eFieldSum, outputEField );
load3dArray( offset, eFieldPolarSum, outputEFieldPolar );
load3dArray( offset, gkFieldSum, outputGkField );
offset = 3*(y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].eField, outputEField );
load3dArray( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
load3dArray( offset, sA[threadIdx.x].gkField, outputGkField );
#endif
lasty = y;
}
pos++;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaCudaFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaCudaFixedEFieldSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaCudaFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
// reduce psWorkArray_3_1 -> EField
// reduce psWorkArray_3_2 -> EFieldPolar
static void kReduceE_Fields_kernel(amoebaGpuContext amoebaGpu )
{
gpuContext gpu = amoebaGpu->gpuContext;
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psE_Field->_pDevData, 0 );
LAUNCHERROR("kReduceE_Fields1");
kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevData, amoebaGpu->psE_FieldPolar->_pDevData, 0 );
LAUNCHERROR("kReduceE_Fields2");
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
#undef GK
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaFixedEField.h"
/**---------------------------------------------------------------------------------------
Compute fixed electric field
@param amoebaGpu amoebaGpu context
@param gpu OpenMM gpu Cuda context
--------------------------------------------------------------------------------------- */
void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
{
gpuContext gpu = amoebaGpu->gpuContext;
kClearFields_3( amoebaGpu, 2 );
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(FixedFieldParticle), gpu->sharedMemoryPerBlock ), maxThreads);
}
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData,
amoebaGpu->psWorkArray_3_2->_pDevData );
} else {
kCalculateAmoebaFixedE_FieldN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData,
amoebaGpu->psWorkArray_3_2->_pDevData );
}
LAUNCHERROR("kCalculateAmoebaFixedE_FieldN2Forces_kernel");
kReduceE_Fields_kernel( amoebaGpu );
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
unsigned int* workUnit,
float* outputEField,
float* outputEFieldPolar){
extern __shared__ FixedFieldParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
FixedFieldParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
FixedFieldParticle localParticle;
loadFixedFieldShared( &localParticle, atomI );
float fieldSum[3];
float fieldPolarSum[3];
fieldSum[0] = 0.0f;
fieldSum[1] = 0.0f;
fieldSum[2] = 0.0f;
fieldPolarSum[0] = 0.0f;
fieldPolarSum[1] = 0.0f;
fieldPolarSum[2] = 0.0f;
if (x == y)
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), atomI );
if (!bExclusionFlag)
{
// this branch is never exercised since it includes the
// interaction between atomI and itself which is always excluded
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[2][3];
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField);
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += match ? 0.0f : ijField[0][0];
fieldSum[1] += match ? 0.0f : ijField[0][1];
fieldSum[2] += match ? 0.0f : ijField[0][2];
fieldPolarSum[0] += match ? 0.0f : ijField[0][0];
fieldPolarSum[1] += match ? 0.0f : ijField[0][1];
fieldPolarSum[2] += match ? 0.0f : ijField[0][2];
}
}
else // bExclusion
{
unsigned int xi = x >> GRIDBITS;
unsigned int cell = xi + xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[2][3];
//loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel( localParticle, psA[j], ijField);
float dScaleVal;
float pScaleVal;
getMaskedDScaleFactor( j, dScaleMask, &dScaleVal );
getMaskedPScaleFactor( j, pScaleMask, &pScaleVal );
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned int match = (atomI == (y + j)) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += match ? 0.0f : dScaleVal*ijField[0][0];
fieldSum[1] += match ? 0.0f : dScaleVal*ijField[0][1];
fieldSum[2] += match ? 0.0f : dScaleVal*ijField[0][2];
fieldPolarSum[0] += match ? 0.0f : pScaleVal*ijField[0][0];
fieldPolarSum[1] += match ? 0.0f : pScaleVal*ijField[0][1];
fieldPolarSum[2] += match ? 0.0f : pScaleVal*ijField[0][2];
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputEField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputEFieldPolar );
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputEField );
load3dArray( offset, fieldPolarSum, outputEFieldPolar );
#endif
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
// load coordinates, charge, ...
loadFixedFieldShared( &(sA[threadIdx.x]), (y+tgx) );
}
// zero shared fields
zeroFixedFieldParticleSharedField( &(sA[threadIdx.x]) );
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
float ijField[2][3];
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += ijField[0][0];
fieldSum[1] += ijField[0][1];
fieldSum[2] += ijField[0][2];
fieldPolarSum[0] += ijField[0][0];
fieldPolarSum[1] += ijField[0][1];
fieldPolarSum[2] += ijField[0][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0];
psA[tj].eField[1] += ijField[1][1];
psA[tj].eField[2] += ijField[1][2];
psA[tj].eFieldP[0] += ijField[1][0];
psA[tj].eFieldP[1] += ijField[1][1];
psA[tj].eFieldP[2] += ijField[1][2];
tj = (tj + 1) & (GRID - 1);
}
}
else // bExclusion
{
// Read fixed atom data into registers and GRF
unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
int dScaleMask = cAmoebaSim.pD_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
int2 pScaleMask = cAmoebaSim.pP_ScaleIndices[cAmoebaSim.pScaleIndicesIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
// load coords, charge, ...
float ijField[2][3];
calculateFixedEFieldPairIxn_kernel( localParticle, psA[tj], ijField);
float dScaleVal;
float pScaleVal;
getMaskedDScaleFactor( tj, dScaleMask, &dScaleVal );
getMaskedPScaleFactor( tj, pScaleMask, &pScaleVal );
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += dScaleVal*ijField[0][0];
fieldSum[1] += dScaleVal*ijField[0][1];
fieldSum[2] += dScaleVal*ijField[0][2];
fieldPolarSum[0] += pScaleVal*ijField[0][0];
fieldPolarSum[1] += pScaleVal*ijField[0][1];
fieldPolarSum[2] += pScaleVal*ijField[0][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += dScaleVal*ijField[1][0];
psA[tj].eField[1] += dScaleVal*ijField[1][1];
psA[tj].eField[2] += dScaleVal*ijField[1][2];
psA[tj].eFieldP[0] += pScaleVal*ijField[1][0];
psA[tj].eFieldP[1] += pScaleVal*ijField[1][1];
psA[tj].eFieldP[2] += pScaleVal*ijField[1][2];
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputEField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputEFieldPolar );
offset = 3*(y + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eField, outputEField );
load3dArrayBufferPerWarp( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
#else
unsigned int offset = 3*(x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, fieldSum, outputEField );
load3dArray( offset, fieldPolarSum, outputEFieldPolar );
offset = 3*(y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( offset, sA[threadIdx.x].eField, outputEField );
load3dArray( offset, sA[threadIdx.x].eFieldP, outputEFieldPolar );
#endif
lasty = y;
}
pos++;
}
}
struct FixedFieldParticle {
// coordinates charge
float x;
float y;
float z;
float q;
// lab frame dipole
float labFrameDipole_X;
float labFrameDipole_Y;
float labFrameDipole_Z;
// lab frame quadrupole
float labFrameQuadrupole_XX;
float labFrameQuadrupole_XY;
float labFrameQuadrupole_XZ;
float labFrameQuadrupole_YY;
float labFrameQuadrupole_YZ;
float labFrameQuadrupole_ZZ;
// scaling factor
float thole;
float damp;
// field accumulators
float eField[3];
float eFieldP[3];
#ifdef GK
// Born radius
float bornR;
// GK field
float gkField[3];
#endif
#ifdef INCLUDE_FIXED_FIELD_BUFFERS
float tempBuffer[3];
float tempBufferP[3];
#endif
};
__device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned int atomI
#ifdef GK
, float* bornR
#endif
)
{
// coordinates & charge
float4 posq = cSim.pPosq[atomI];
sA->x = posq.x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->q = cSim.pPosq[atomI].w;
// lab dipole
sA->labFrameDipole_X = cAmoebaSim.pLabFrameDipole[atomI*3];
sA->labFrameDipole_Y = cAmoebaSim.pLabFrameDipole[atomI*3+1];
sA->labFrameDipole_Z = cAmoebaSim.pLabFrameDipole[atomI*3+2];
// lab quadrupole
sA->labFrameQuadrupole_XX = cAmoebaSim.pLabFrameQuadrupole[atomI*9];
sA->labFrameQuadrupole_XY = cAmoebaSim.pLabFrameQuadrupole[atomI*9+1];
sA->labFrameQuadrupole_XZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+2];
sA->labFrameQuadrupole_YY = cAmoebaSim.pLabFrameQuadrupole[atomI*9+4];
sA->labFrameQuadrupole_YZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+5];
sA->labFrameQuadrupole_ZZ = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
float2 dampingFactorAndThole = cAmoebaSim.pDampingFactorAndThole[atomI];
sA->damp = dampingFactorAndThole.x;
sA->thole = dampingFactorAndThole.y;
#ifdef GK
sA->bornR = bornR[atomI];
#endif
}
// load struct and arrays w/ shared data in sA
__device__ static void loadFixedFieldParticleData( struct FixedFieldParticle* sA,
float4* jCoord, float* jDipole, float* jQuadrupole
#ifdef GK
, float* bornR
#endif
)
{
// load coords, charge, ...
jCoord->x = sA->x;
jCoord->y = sA->y;
jCoord->z = sA->z;
jCoord->w = sA->q;
jDipole[0] = sA->labFrameDipole_X;
jDipole[1] = sA->labFrameDipole_Y;
jDipole[2] = sA->labFrameDipole_Z;
jQuadrupole[0] = sA->labFrameQuadrupole_XX;
jQuadrupole[1] = sA->labFrameQuadrupole_XY;
jQuadrupole[2] = sA->labFrameQuadrupole_XZ;
jQuadrupole[3] = sA->labFrameQuadrupole_XY;
jQuadrupole[4] = sA->labFrameQuadrupole_YY;
jQuadrupole[5] = sA->labFrameQuadrupole_YZ;
jQuadrupole[6] = sA->labFrameQuadrupole_XZ;
jQuadrupole[7] = sA->labFrameQuadrupole_YZ;
jQuadrupole[8] = sA->labFrameQuadrupole_ZZ;
#ifdef GK
*bornR = sA->bornR;
#endif
}
// zero fields
__device__ static void zeroFixedFieldParticleSharedField( struct FixedFieldParticle* sA )
{
sA->eField[0] = 0.0f;
sA->eField[1] = 0.0f;
sA->eField[2] = 0.0f;
sA->eFieldP[0] = 0.0f;
sA->eFieldP[1] = 0.0f;
sA->eFieldP[2] = 0.0f;
#ifdef GK
sA->gkField[0] = 0.0f;
sA->gkField[1] = 0.0f;
sA->gkField[2] = 0.0f;
#endif
}
// body of fixed E-field calculation
__device__ static void calculateFixedEFieldPairIxn_kernel( FixedFieldParticle& atomI, FixedFieldParticle& atomJ,
float field[2][3])
{
// ---------------------------------------------------------------------------------------
// get deltaR and r between 2 atoms
float deltaR[3];
deltaR[0] = atomJ.x - atomI.x;
deltaR[1] = atomJ.y - atomI.y;
deltaR[2] = atomJ.z - atomI.z;
float r = SQRT( deltaR[0]*deltaR[0] + deltaR[1]*deltaR[1] + deltaR[2]*deltaR[2] );
float rI = 1.0f/r;
float r2I = rI*rI;
float rr3 = rI*r2I;
float rr5 = 3.0f*rr3*r2I;
float rr7 = 5.0f*rr5*r2I;
// get scaling factors, if needed
float damp = atomI.damp*atomJ.damp;
float dampExp;
if( damp != 0.0f && r < cAmoebaSim.scalingDistanceCutoff ){
// get scaling factors
float ratio = r/damp;
float pGamma = atomJ.thole > atomI.thole ? atomI.thole : atomJ.thole;
damp = ratio*ratio*ratio*pGamma;
dampExp = EXP( -damp );
} else {
dampExp = 0.0f;
}
rr3 *= 1.0f - dampExp;
rr5 *= 1.0f - ( 1.0f + damp )*dampExp;
rr7 *= 1.0f - ( 1.0f + damp + (0.6f*damp*damp))*dampExp;
float rr5_2 = rr5*2.0f;
float qDotDelta[3];
qDotDelta[0] = deltaR[0]*atomJ.labFrameQuadrupole_XX + deltaR[1]*atomJ.labFrameQuadrupole_XY + deltaR[2]*atomJ.labFrameQuadrupole_XZ;
qDotDelta[1] = deltaR[0]*atomJ.labFrameQuadrupole_XY + deltaR[1]*atomJ.labFrameQuadrupole_YY + deltaR[2]*atomJ.labFrameQuadrupole_YZ;
qDotDelta[2] = deltaR[0]*atomJ.labFrameQuadrupole_XZ + deltaR[1]*atomJ.labFrameQuadrupole_YZ + deltaR[2]*atomJ.labFrameQuadrupole_ZZ;
float dotdd = deltaR[0]*atomJ.labFrameDipole_X + deltaR[1]*atomJ.labFrameDipole_Y + deltaR[2]*atomJ.labFrameDipole_Z;
float dotqd = deltaR[0]*qDotDelta[0] + deltaR[1]*qDotDelta[1] + deltaR[2]*qDotDelta[2];
float factor = -rr3*atomJ.q + rr5*dotdd - rr7*dotqd;
field[0][0] = deltaR[0]*factor - rr3*atomJ.labFrameDipole_X + rr5_2*qDotDelta[0];
field[0][1] = deltaR[1]*factor - rr3*atomJ.labFrameDipole_Y + rr5_2*qDotDelta[1];
field[0][2] = deltaR[2]*factor - rr3*atomJ.labFrameDipole_Z + rr5_2*qDotDelta[2];
qDotDelta[0] = deltaR[0]*atomI.labFrameQuadrupole_XX + deltaR[1]*atomI.labFrameQuadrupole_XY + deltaR[2]*atomI.labFrameQuadrupole_XZ;
qDotDelta[1] = deltaR[0]*atomI.labFrameQuadrupole_XY + deltaR[1]*atomI.labFrameQuadrupole_YY + deltaR[2]*atomI.labFrameQuadrupole_YZ;
qDotDelta[2] = deltaR[0]*atomI.labFrameQuadrupole_XZ + deltaR[1]*atomI.labFrameQuadrupole_YZ + deltaR[2]*atomI.labFrameQuadrupole_ZZ;
dotdd = deltaR[0]*atomI.labFrameDipole_X + deltaR[1]*atomI.labFrameDipole_Y + deltaR[2]*atomI.labFrameDipole_Z;
dotqd = deltaR[0]*qDotDelta[0] + deltaR[1]*qDotDelta[1] + deltaR[2]*qDotDelta[2];
factor = rr3*atomI.q + rr5*dotdd + rr7*dotqd;
field[1][0] = deltaR[0]*factor - rr3*atomI.labFrameDipole_X - rr5_2*qDotDelta[0];
field[1][1] = deltaR[1]*factor - rr3*atomI.labFrameDipole_Y - rr5_2*qDotDelta[1];
field[1][2] = deltaR[2]*factor - rr3*atomI.labFrameDipole_Z - rr5_2*qDotDelta[2];
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaGrycukSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaGrycukSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaGrycukSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaGrycukSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
struct GrycukParticle {
float x;
float y;
float z;
float radius;
float scaledRadius;
float bornSum;
};
__device__ void loadGrycukShared( struct GrycukParticle* sA, unsigned int atomI )
{
// coordinates, radii and scaled radii
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->radius = cSim.pObcData[atomI].x;
sA->scaledRadius = cSim.pObcData[atomI].y;
}
__device__ void calculateGrycukBornRadiiPairIxn_kernel( GrycukParticle& atomI, GrycukParticle& atomJ, float* bornSum ){
/*
* radius: radius (TINKER rsolv)
* scaledRadius: radius*overlap scale factor (TINKER rsolv*shct)
*
*/
float xr,yr,zr;
float r,r2;
float sk, sk2;
float lik, uik;
float lik3, uik3;
float l2, l4, lr, l4r;
float u2, u4, ur, u4r;
float term;
// decide whether to compute the current interaction;
*bornSum = 0.0f;
if( atomI.radius <= 0.0f ){
return;
}
xr = atomJ.x - atomI.x;
yr = atomJ.y - atomI.y;
zr = atomJ.z - atomI.z;
r2 = xr*xr + yr*yr + zr*zr;
r = sqrtf(r2);
sk = atomJ.scaledRadius;
sk2 = sk*sk;
if( (atomI.radius + r) < sk ){
lik = atomI.radius;
uik = sk - r;
lik3 = lik*lik*lik;
uik3 = uik*uik*uik;
*bornSum -= (1.0f/uik3 - 1.0f/lik3);
}
uik = r + sk;
if( (atomI.radius + r) < sk ){
lik = sk - r;
} else if( r < (atomI.radius + sk) ){
lik = atomI.radius;
} else {
lik = r - sk;
}
l2 = lik*lik;
l4 = l2*l2;
lr = lik*r;
l4r = l4*r;
u2 = uik*uik;
u4 = u2*u2;
ur = uik*r;
u4r = u4*r;
term = (3.0f*(r2-sk2)+6.0f*u2-8.0f*ur)/u4r - (3.0f*(r2-sk2)+6.0f*l2-8.0f*lr)/l4r;
*bornSum += term/16.0f;
}
__device__ void zeroGrycukParticleSharedField( struct GrycukParticle* sA )
{
sA->bornSum = 0.0f;
}
__global__
__launch_bounds__(384, 1)
void kReduceGrycukGbsaBornSum_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float sum = 0.0f;
float* pSt = cSim.pBornSum + pos;
// Get summed Born data
for (int i = 0; i < cSim.nonbondOutputBuffers; i++)
{
sum += *pSt;
pSt += cSim.stride;
}
// Now calculate Born radius
float radius = cSim.pObcData[pos].x;
radius = 1.0f/(radius*radius*radius);
sum = radius - sum;
sum = sum <= 0.0f ? 1000.0f : powf( sum, -1.0f/3.0f );
cSim.pBornRadii[pos] = sum;
pos += gridDim.x * blockDim.x;
}
}
/**---------------------------------------------------------------------------------------
Reduce Born radii
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kReduceGrycukGbsaBornSum( amoebaGpuContext amoebaGpu )
{
kReduceGrycukGbsaBornSum_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>();
LAUNCHERROR("kReduceGrycukGbsaBornSum");
if( 0 ){
static int callId = 0;
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( callId++ );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornRadii, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "BornRGry", fileId, outputVector );
}
}
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaGrycukBornRadii.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaGrycukBornRadii.h"
/**---------------------------------------------------------------------------------------
Compute Born radii using Grycuk algorithm
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kCalculateAmoebaGrycukBornRadii( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// apparently debug array can take up nontrivial no. registers
// on first pass, set threads/block and based on that setting the energy buffer array
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(GrycukParticle), gpu->sharedMemoryPerBlock ), maxThreads);
}
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaGrycukBornRadiiN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
} else {
kCalculateAmoebaGrycukBornRadiiN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
}
LAUNCHERROR("kCalculateAmoebaCudaGrycukN2Forces");
// ---------------------------------------------------------------------------------------
}
// Born radius chain rule component for Grycuk
struct GrycukChainRuleParticle {
float x;
float y;
float z;
float radius;
float scaledRadius;
float bornRadius;
float bornForce;
float force[3];
};
__device__ void loadGrycukChainRuleParticleShared( struct GrycukChainRuleParticle* sA, unsigned int atomI )
{
// coordinates, radii and scaled radii
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->radius = cSim.pObcData[atomI].x;
sA->scaledRadius = cSim.pObcData[atomI].y;
sA->bornRadius = cSim.pBornRadii[atomI];
sA->bornForce = cSim.pBornForce[atomI];
}
__device__ void zeroGrycukChainRuleParticleSharedField( struct GrycukChainRuleParticle* sA )
{
// zero force
sA->force[0] = 0.0f;
sA->force[1] = 0.0f;
sA->force[2] = 0.0f;
}
__device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle& atomI, GrycukChainRuleParticle& atomJ, float force[3] ){
const float pi = 3.1415926535897f;
float third = 1.0f/3.0f;
float pi43 = 4.0f*third*pi;
float lik, uik;
float lik4, uik4;
float factor = -powf(pi,third)*powf(6.0f,(2.0f*third))/9.0f;
float term = pi43/(atomI.bornRadius*atomI.bornRadius*atomI.bornRadius);
term = factor/powf( term, (4.0f*third) );
float xr = atomJ.x - atomI.x;
float yr = atomJ.y - atomI.y;
float zr = atomJ.z - atomI.z;
float sk = atomJ.scaledRadius;
float sk2 = sk*sk;
float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrtf(r2);
float de = 0.0f;
if( (atomI.radius + r) < sk ){
float uik4;
uik = sk - r;
uik4 = uik*uik;
uik4 = uik4*uik4;
de = -4.0f*pi/uik4;
}
if( (atomI.radius + r) < sk){
lik = sk - r;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(sk2-4.0f*sk*r+17.0f*r2)/ (r2*lik4);
} else if( r < (atomI.radius +sk) ){
lik = atomI.radius;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(2.0f*atomI.radius*atomI.radius-sk2-r2)/ (r2*lik4);
} else {
lik = r - sk;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(sk2-4.0f*sk*r+r2)/ (r2*lik4);
}
uik = r + sk;
uik4 = uik*uik;
uik4 = uik4*uik4;
de -= 0.25f*pi*(sk2+4.0f*sk*r+r2)/ (r2*uik4);
float dbr = term * de/r;
de = dbr*atomI.bornForce;
force[0] = xr*de;
force[1] = yr*de;
force[2] = zr*de;
}
// Include versions of the kernels for N^2 calculations.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaGrycukChainRule.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaGrycukChainRule.h"
/**---------------------------------------------------------------------------------------
Compute Grycuk chain rule contribution to force
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kCalculateGrycukGbsaForces2( amoebaGpuContext amoebaGpu )
{
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// apparently debug array can take up nontrivial no. registers
// on first pass, set threads/block and based on that setting the energy buffer array
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(GrycukChainRuleParticle), gpu->sharedMemoryPerBlock ), maxThreads);
}
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaGrycukChainRuleN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukChainRuleParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
} else {
kCalculateAmoebaGrycukChainRuleN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukChainRuleParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
}
LAUNCHERROR("kCalculateAmoebaCudaGrycukN2Forces");
if( 0 ){
static int callId = 0;
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( callId++ );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
//cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psLabFrameDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
CUDAStream<float>* temp = new CUDAStream<float>(3*gpu->sim.paddedNumberOfAtoms, 1, "Temp1");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, gpu->psAtomIndex->_pSysData, 1.0f/4.184f );
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornForce, outputVector, gpu->psAtomIndex->_pSysData, 1.0f/4.184f );
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornRadii, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "GryF", fileId, outputVector );
delete temp;
//exit(0);
}
// ---------------------------------------------------------------------------------------
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment