Unverified Commit 73cac8e6 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #2085 from peastman/charges

Support multiple NonbondedForces in one System 
parents a885a6ae daa9de8a
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -268,7 +268,7 @@ public: ...@@ -268,7 +268,7 @@ public:
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data; CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14, posqIndex;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient; double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
...@@ -277,6 +277,7 @@ private: ...@@ -277,6 +277,7 @@ private:
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params; std::vector<float> C6params;
std::vector<float> charges;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded; CpuNonbondedForce* nonbonded;
Kernel optimizedPme, optimizedDispersionPme; Kernel optimizedPme, optimizedDispersionPme;
...@@ -363,7 +364,9 @@ public: ...@@ -363,7 +364,9 @@ public:
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force); void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private: private:
CpuPlatform::PlatformData& data; CpuPlatform::PlatformData& data;
int posqIndex;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> charges;
CpuGBSAOBCForce obc; CpuGBSAOBCForce obc;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -92,6 +92,7 @@ public: ...@@ -92,6 +92,7 @@ public:
PlatformData(int numParticles, int numThreads, bool deterministicForces); PlatformData(int numParticles, int numThreads, bool deterministicForces);
~PlatformData(); ~PlatformData();
void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList); void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
int requestPosqIndex();
AlignedArray<float> posq; AlignedArray<float> posq;
std::vector<AlignedArray<float> > threadForce; std::vector<AlignedArray<float> > threadForce;
ThreadPool threads; ThreadPool threads;
...@@ -101,6 +102,7 @@ public: ...@@ -101,6 +102,7 @@ public:
CpuNeighborList* neighborList; CpuNeighborList* neighborList;
double cutoff, paddedCutoff; double cutoff, paddedCutoff;
bool anyExclusions, deterministicForces; bool anyExclusions, deterministicForces;
int currentPosqIndex, nextPosqIndex;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
}; };
......
...@@ -138,6 +138,19 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& ...@@ -138,6 +138,19 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy; return 0.5*energy;
} }
/**
* Copy particle charges into the fourth element of the posq array.
*/
static void copyChargesToPosq(ContextImpl& context, const vector<float>& charges, int index) {
CpuPlatform::PlatformData& data = CpuPlatform::getPlatformData(context);
if (index == data.currentPosqIndex)
return;
data.currentPosqIndex = index;
AlignedArray<float>& posq = data.posq;
for (int i = 0; i < charges.size(); i++)
posq[4*i+3] = charges[i];
}
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) : CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) { CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel. // Create a Reference platform version of this kernel.
...@@ -530,6 +543,7 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() { ...@@ -530,6 +543,7 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
} }
void CpuCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) { void CpuCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
posqIndex = data.requestPosqIndex();
// Identify which exceptions are 1-4 interactions. // Identify which exceptions are 1-4 interactions.
...@@ -556,12 +570,13 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -556,12 +570,13 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++) for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3]; bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles); particleParams.resize(numParticles);
charges.resize(numParticles);
C6params.resize(numParticles); C6params.resize(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; charges[i] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second; C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
...@@ -660,6 +675,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -660,6 +675,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
} }
copyChargesToPosq(context, charges, posqIndex);
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<Vec3>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
...@@ -726,11 +742,12 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -726,11 +742,12 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the values. // Record the values.
posqIndex = data.requestPosqIndex();
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; charges[i] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -964,12 +981,14 @@ CpuCalcGBSAOBCForceKernel::~CpuCalcGBSAOBCForceKernel() { ...@@ -964,12 +981,14 @@ CpuCalcGBSAOBCForceKernel::~CpuCalcGBSAOBCForceKernel() {
} }
void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
posqIndex = data.requestPosqIndex();
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
particleParams.resize(numParticles); particleParams.resize(numParticles);
charges.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor; double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor); force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge; charges[i] = (float) charge;
radius -= 0.009; radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius)); particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
} }
...@@ -983,6 +1002,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo ...@@ -983,6 +1002,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo
} }
double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
copyChargesToPosq(context, charges, posqIndex);
if (data.isPeriodic) { if (data.isPeriodic) {
Vec3& boxSize = extractBoxSize(context); Vec3& boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]}; float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
...@@ -1000,10 +1020,11 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co ...@@ -1000,10 +1020,11 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
// Record the values. // Record the values.
posqIndex = data.requestPosqIndex();
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor; double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor); force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge; charges[i] = (float) charge;
radius -= 0.009; radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius)); particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -147,7 +147,7 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl& ...@@ -147,7 +147,7 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
} }
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads), CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) { deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false), currentPosqIndex(-1), nextPosqIndex(0) {
numThreads = threads.getNumThreads(); numThreads = threads.getNumThreads();
threadForce.resize(numThreads); threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
...@@ -184,3 +184,7 @@ void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, doubl ...@@ -184,3 +184,7 @@ void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, doubl
else if (!anyExclusions) else if (!anyExclusions)
exclusions = exclusionList; exclusions = exclusionList;
} }
int CpuPlatform::PlatformData::requestPosqIndex() {
return nextPosqIndex++;
}
\ No newline at end of file
...@@ -503,6 +503,11 @@ public: ...@@ -503,6 +503,11 @@ public:
* Set the particle charges. These are packed into the fourth element of the posq array. * Set the particle charges. These are packed into the fourth element of the posq array.
*/ */
void setCharges(const std::vector<double>& charges); void setCharges(const std::vector<double>& charges);
/**
* Request to use the fourth element of the posq array for storing charges. Since only one force can
* do that, this returns true the first time it is called, and false on all subsequent calls.
*/
bool requestPosqCharges();
/** /**
* Get the thread used by this context for executing parallel computations. * Get the thread used by this context for executing parallel computations.
*/ */
...@@ -626,7 +631,7 @@ private: ...@@ -626,7 +631,7 @@ private:
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid, hasAssignedPosqCharges;
bool isLinkedContext; bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
......
...@@ -675,6 +675,7 @@ private: ...@@ -675,6 +675,7 @@ private:
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
bool hasInitializedFFT; bool hasInitializedFFT;
CudaArray charges;
CudaArray sigmaEpsilon; CudaArray sigmaEpsilon;
CudaArray exceptionParams; CudaArray exceptionParams;
CudaArray cosSinSums; CudaArray cosSinSums;
...@@ -719,7 +720,7 @@ private: ...@@ -719,7 +720,7 @@ private:
int interpolateForceThreads; int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME; bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME, usePosqCharges;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -815,6 +816,7 @@ private: ...@@ -815,6 +816,7 @@ private:
int maxTiles; int maxTiles;
CudaContext& cu; CudaContext& cu;
ForceInfo* info; ForceInfo* info;
CudaArray charges;
CudaArray params; CudaArray params;
CudaArray bornSum; CudaArray bornSum;
CudaArray bornRadii; CudaArray bornRadii;
......
...@@ -107,8 +107,8 @@ static int executeInWindows(const string &command) { ...@@ -107,8 +107,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData, CudaContext* originalContext) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData, CudaContext* originalContext) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasAssignedPosqCharges(false),
pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use. // Determine what compiler to use.
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
...@@ -301,6 +301,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -301,6 +301,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed4"] = "make_float4"; compilationDefines["make_mixed4"] = "make_float4";
} }
posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0));
atomIndexDevice.initialize<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
// Create utility kernels that are used in multiple places. // Create utility kernels that are used in multiple places.
...@@ -475,11 +480,6 @@ void CudaContext::initialize() { ...@@ -475,11 +480,6 @@ void CudaContext::initialize() {
energyParamDerivBuffer.initialize<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer"); energyParamDerivBuffer.initialize<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(energyParamDerivBuffer); addAutoclearBuffer(energyParamDerivBuffer);
} }
atomIndexDevice.initialize<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -887,14 +887,14 @@ void CudaContext::setCharges(const vector<double>& charges) { ...@@ -887,14 +887,14 @@ void CudaContext::setCharges(const vector<double>& charges) {
if (!chargeBuffer.isInitialized()) if (!chargeBuffer.isInitialized())
chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer"); chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) { if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer(); vector<double> c(numAtoms);
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < numAtoms; i++)
c[i] = charges[i]; c[i] = charges[i];
chargeBuffer.upload(c); chargeBuffer.upload(c);
} }
else { else {
float* c = (float*) getPinnedBuffer(); vector<float> c(numAtoms);
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < numAtoms; i++)
c[i] = (float) charges[i]; c[i] = (float) charges[i];
chargeBuffer.upload(c); chargeBuffer.upload(c);
} }
...@@ -902,6 +902,12 @@ void CudaContext::setCharges(const vector<double>& charges) { ...@@ -902,6 +902,12 @@ void CudaContext::setCharges(const vector<double>& charges) {
executeKernel(setChargesKernel, args, numAtoms); executeKernel(setChargesKernel, args, numAtoms);
} }
bool CudaContext::requestPosqCharges() {
bool allow = !hasAssignedPosqCharges;
hasAssignedPosqCharges = true;
return allow;
}
/** /**
* This class ensures that atom reordering doesn't break virtual sites. * This class ensures that atom reordering doesn't break virtual sites.
*/ */
......
This diff is collapsed.
...@@ -4,7 +4,11 @@ ...@@ -4,7 +4,11 @@
unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection); unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; #if HAS_COULOMB
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION #ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR); const real erfcAlphaR = erfc(alphaR);
...@@ -30,7 +34,7 @@ ...@@ -30,7 +34,7 @@
const real dar6 = dar4*dar2; const real dar6 = dar4*dar2;
const real invR2 = invR*invR; const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2); const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2; const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y; const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6; const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4; const real eprefac = 1.0f + dar2 + 0.5f*dar4;
...@@ -40,6 +44,7 @@ ...@@ -40,6 +44,7 @@
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) { if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles. real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
...@@ -47,8 +52,9 @@ ...@@ -47,8 +52,9 @@
} }
else { else {
includeInteraction = false; includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w; tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
} }
#endif
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
#if DO_LJPME #if DO_LJPME
// The multiplicative grid term // The multiplicative grid term
...@@ -59,11 +65,11 @@ ...@@ -59,11 +65,11 @@
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x; real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y; real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps; real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f); real ljEnergy = epssig6*(sig6 - 1.0f);
...@@ -108,11 +114,11 @@ ...@@ -108,11 +114,11 @@
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x; real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = includeInteraction ? epssig6*(sig6 - 1) : 0; real ljEnergy = includeInteraction ? epssig6*(sig6 - 1) : 0;
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
...@@ -128,11 +134,11 @@ ...@@ -128,11 +134,11 @@
#endif #endif
#if HAS_COULOMB #if HAS_COULOMB
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const real prefactor = 138.935456f*posq1.w*posq2.w; const real prefactor = 138.935456f*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2); tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += includeInteraction ? prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C) : 0; tempEnergy += includeInteraction ? prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C) : 0;
#else #else
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
tempForce += prefactor; tempForce += prefactor;
tempEnergy += includeInteraction ? prefactor : 0; tempEnergy += includeInteraction ? prefactor : 0;
#endif #endif
......
...@@ -66,7 +66,7 @@ typedef struct { ...@@ -66,7 +66,7 @@ typedef struct {
/** /**
* Compute the Born sum. * Compute the Born sum.
*/ */
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params, extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const real* __restrict__ charge, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -92,6 +92,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -92,6 +92,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
real bornSum = 0; real bornSum = 0;
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -99,7 +100,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -99,7 +100,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = posq1.x; localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y; localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z; localData[threadIdx.x].z = posq1.z;
localData[threadIdx.x].q = posq1.w; localData[threadIdx.x].q = charge1;
localData[threadIdx.x].radius = params1.x; localData[threadIdx.x].radius = params1.x;
localData[threadIdx.x].scaledRadius = params1.y; localData[threadIdx.x].scaledRadius = params1.y;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
...@@ -138,7 +139,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -138,7 +139,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = tempPosq.x; localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y; localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z; localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w; localData[threadIdx.x].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[threadIdx.x].radius = tempParams.x; localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y; localData[threadIdx.x].scaledRadius = tempParams.y;
...@@ -262,6 +263,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -262,6 +263,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// Load atom data for this tile. // Load atom data for this tile.
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx]; unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
...@@ -274,7 +276,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -274,7 +276,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = tempPosq.x; localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y; localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z; localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w; localData[threadIdx.x].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[threadIdx.x].radius = tempParams.x; localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y; localData[threadIdx.x].scaledRadius = tempParams.y;
...@@ -400,7 +402,7 @@ typedef struct { ...@@ -400,7 +402,7 @@ typedef struct {
*/ */
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce, extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii, bool needEnergy, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ charge, const real* __restrict__ global_bornRadii, bool needEnergy,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -427,6 +429,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -427,6 +429,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 force = make_real4(0); real4 force = make_real4(0);
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -434,12 +437,13 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -434,12 +437,13 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = posq1.x; localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y; localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z; localData[threadIdx.x].z = posq1.z;
localData[threadIdx.x].q = posq1.w; localData[threadIdx.x].q = charge1;
localData[threadIdx.x].bornRadius = bornRadius1; localData[threadIdx.x].bornRadius = bornRadius1;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q); real3 pos2 = make_real3(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real charge2 = localData[tbx+j].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -455,7 +459,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -455,7 +459,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -485,7 +489,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -485,7 +489,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = tempPosq.x; localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y; localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z; localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w; localData[threadIdx.x].q = charge[j];
localData[threadIdx.x].bornRadius = global_bornRadii[j]; localData[threadIdx.x].bornRadius = global_bornRadii[j];
localData[threadIdx.x].fx = 0.0f; localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f; localData[threadIdx.x].fy = 0.0f;
...@@ -494,8 +498,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -494,8 +498,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -511,7 +516,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -511,7 +516,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -617,6 +622,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -617,6 +622,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// Load atom data for this tile. // Load atom data for this tile.
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx]; unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
...@@ -629,7 +635,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -629,7 +635,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = tempPosq.x; localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y; localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z; localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w; localData[threadIdx.x].q = charge[j];
localData[threadIdx.x].bornRadius = global_bornRadii[j]; localData[threadIdx.x].bornRadius = global_bornRadii[j];
localData[threadIdx.x].fx = 0.0f; localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f; localData[threadIdx.x].fy = 0.0f;
...@@ -648,8 +654,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -648,8 +654,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
...@@ -660,7 +667,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -660,7 +667,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -693,8 +700,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -693,8 +700,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -710,7 +718,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -710,7 +718,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
{ {
real invRSquaredOver4 = 0.25f*invR*invR; real invRSquaredOver4 = 0.25f*invR*invR;
real rScaledRadiusJ = r+obcParams2.y; real rScaledRadiusJ = r+OBC_PARAMS2.y;
real rScaledRadiusI = r+obcParams1.y; real rScaledRadiusI = r+OBC_PARAMS1.y;
real l_ijJ = RECIP(max(obcParams1.x, fabs(r-obcParams2.y))); real l_ijJ = RECIP(max(OBC_PARAMS1.x, fabs(r-OBC_PARAMS2.y)));
real l_ijI = RECIP(max(obcParams2.x, fabs(r-obcParams1.y))); real l_ijI = RECIP(max(OBC_PARAMS2.x, fabs(r-OBC_PARAMS1.y)));
real u_ijJ = RECIP(rScaledRadiusJ); real u_ijJ = RECIP(rScaledRadiusJ);
real u_ijI = RECIP(rScaledRadiusI); real u_ijI = RECIP(rScaledRadiusI);
real l_ij2J = l_ijJ*l_ijJ; real l_ij2J = l_ijJ*l_ijJ;
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
real t1I = LOG(u_ijI*RECIP(l_ijI)); real t1I = LOG(u_ijI*RECIP(l_ijI));
real t2J = (l_ij2J-u_ij2J); real t2J = (l_ij2J-u_ij2J);
real t2I = (l_ij2I-u_ij2I); real t2I = (l_ij2I-u_ij2I);
real term1 = (0.5f*(0.25f+obcParams2.y*obcParams2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR; real term1 = (0.5f*(0.25f+OBC_PARAMS2.y*OBC_PARAMS2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+obcParams1.y*obcParams1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR; real term2 = (0.5f*(0.25f+OBC_PARAMS1.y*OBC_PARAMS1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (obcParams1.x < rScaledRadiusJ ? bornForce1*term1/0x100000000 : 0); real tempdEdR = (OBC_PARAMS1.x < rScaledRadiusJ ? BORN_FORCE1*term1/0x100000000 : 0);
tempdEdR += (obcParams2.x < rScaledRadiusI ? bornForce2*term2/0x100000000 : 0); tempdEdR += (OBC_PARAMS2.x < rScaledRadiusI ? BORN_FORCE2*term2/0x100000000 : 0);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED); unsigned int includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED);
#else #else
......
...@@ -3,8 +3,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int ...@@ -3,8 +3,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with. // Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real4 pos = posq[i]; real4 pos = posq[atom];
APPLY_PERIODIC_TO_POS(pos) APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
...@@ -15,7 +15,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int ...@@ -15,7 +15,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X, int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z); ((int) t.z) % GRID_SIZE_Z);
pmeAtomGridIndex[i] = make_int2(i, gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z); pmeAtomGridIndex[atom] = make_int2(atom, gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z);
} }
} }
...@@ -24,6 +24,8 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -24,6 +24,8 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME #ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon , const float2* __restrict__ sigmaEpsilon
#else
, const real* __restrict__ charges
#endif #endif
) { ) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
...@@ -39,7 +41,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -39,7 +41,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
const float2 sigEps = sigmaEpsilon[atom]; const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y; const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else #else
const real charge = pos.w; const real charge = CHARGE;
#endif #endif
if (charge == 0) if (charge == 0)
continue; continue;
...@@ -253,6 +255,8 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __ ...@@ -253,6 +255,8 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME #ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon , const float2* __restrict__ sigmaEpsilon
#else
, const real* __restrict__ charges
#endif #endif
) { ) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
...@@ -330,7 +334,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __ ...@@ -330,7 +334,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
const float2 sigEps = sigmaEpsilon[atom]; const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y; real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else #else
real q = pos.w*EPSILON_FACTOR; real q = CHARGE*EPSILON_FACTOR;
#endif #endif
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x); real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y); real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
......
...@@ -618,6 +618,11 @@ public: ...@@ -618,6 +618,11 @@ public:
* Set the particle charges. These are packed into the fourth element of the posq array. * Set the particle charges. These are packed into the fourth element of the posq array.
*/ */
void setCharges(const std::vector<double>& charges); void setCharges(const std::vector<double>& charges);
/**
* Request to use the fourth element of the posq array for storing charges. Since only one force can
* do that, this returns true the first time it is called, and false on all subsequent calls.
*/
bool requestPosqCharges();
/** /**
* Get the thread used by this context for executing parallel computations. * Get the thread used by this context for executing parallel computations.
*/ */
...@@ -738,7 +743,7 @@ private: ...@@ -738,7 +743,7 @@ private:
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid, hasAssignedPosqCharges;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ; mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -652,6 +652,7 @@ private: ...@@ -652,6 +652,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLArray charges;
OpenCLArray sigmaEpsilon; OpenCLArray sigmaEpsilon;
OpenCLArray exceptionParams; OpenCLArray exceptionParams;
OpenCLArray cosSinSums; OpenCLArray cosSinSums;
...@@ -698,7 +699,7 @@ private: ...@@ -698,7 +699,7 @@ private:
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha; double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME; bool hasCoulomb, hasLJ, usePmeQueue, doLJPME, usePosqCharges;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -795,6 +796,7 @@ private: ...@@ -795,6 +796,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
ForceInfo* info; ForceInfo* info;
OpenCLArray params; OpenCLArray params;
OpenCLArray charges;
OpenCLArray bornSum; OpenCLArray bornSum;
OpenCLArray longBornSum; OpenCLArray longBornSum;
OpenCLArray bornRadii; OpenCLArray bornRadii;
......
...@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), hasAssignedPosqCharges(false),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
...@@ -295,6 +295,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -295,6 +295,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["convert_mixed4"] = "convert_float4"; compilationDefines["convert_mixed4"] = "convert_float4";
} }
posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0));
atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -494,11 +499,6 @@ void OpenCLContext::initialize() { ...@@ -494,11 +499,6 @@ void OpenCLContext::initialize() {
((mm_float4*) pinnedMemory)[i] = mm_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (cl_float) (1.0/mass)); ((mm_float4*) pinnedMemory)[i] = mm_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (cl_float) (1.0/mass));
} }
velm.upload(pinnedMemory); velm.upload(pinnedMemory);
atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
nonbonded->initialize(system); nonbonded->initialize(system);
} }
...@@ -770,14 +770,14 @@ void OpenCLContext::setCharges(const vector<double>& charges) { ...@@ -770,14 +770,14 @@ void OpenCLContext::setCharges(const vector<double>& charges) {
if (!chargeBuffer.isInitialized()) if (!chargeBuffer.isInitialized())
chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer"); chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) { if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer(); vector<double> c(numAtoms);
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < numAtoms; i++)
c[i] = charges[i]; c[i] = charges[i];
chargeBuffer.upload(c); chargeBuffer.upload(c);
} }
else { else {
float* c = (float*) getPinnedBuffer(); vector<float> c(numAtoms);
for (int i = 0; i < charges.size(); i++) for (int i = 0; i < numAtoms; i++)
c[i] = (float) charges[i]; c[i] = (float) charges[i];
chargeBuffer.upload(c); chargeBuffer.upload(c);
} }
...@@ -788,6 +788,12 @@ void OpenCLContext::setCharges(const vector<double>& charges) { ...@@ -788,6 +788,12 @@ void OpenCLContext::setCharges(const vector<double>& charges) {
executeKernel(setChargesKernel, numAtoms); executeKernel(setChargesKernel, numAtoms);
} }
bool OpenCLContext::requestPosqCharges() {
bool allow = !hasAssignedPosqCharges;
hasAssignedPosqCharges = true;
return allow;
}
/** /**
* This class ensures that atom reordering doesn't break virtual sites. * This class ensures that atom reordering doesn't break virtual sites.
*/ */
......
This diff is collapsed.
...@@ -9,7 +9,11 @@ ...@@ -9,7 +9,11 @@
includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection); includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r; const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR); const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; #if HAS_COULOMB
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION #ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR); const real erfcAlphaR = erfc(alphaR);
...@@ -35,7 +39,7 @@ ...@@ -35,7 +39,7 @@
const real dar6 = dar4*dar2; const real dar6 = dar4*dar2;
const real invR2 = invR*invR; const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2); const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2; const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y; const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6; const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4; const real eprefac = 1.0f + dar2 + 0.5f*dar4;
...@@ -45,6 +49,7 @@ ...@@ -45,6 +49,7 @@
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) { if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles. real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
...@@ -52,8 +57,9 @@ ...@@ -52,8 +57,9 @@
} }
else { else {
includeInteraction = false; includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w; tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
} }
#endif
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
#if DO_LJPME #if DO_LJPME
// The multiplicative grid term // The multiplicative grid term
...@@ -64,11 +70,11 @@ ...@@ -64,11 +70,11 @@
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x; real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y; real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps; real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f); real ljEnergy = epssig6*(sig6 - 1.0f);
...@@ -113,11 +119,11 @@ ...@@ -113,11 +119,11 @@
#endif #endif
real tempForce = 0; real tempForce = 0;
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x; real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6-1); real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
...@@ -134,11 +140,11 @@ ...@@ -134,11 +140,11 @@
#endif #endif
#if HAS_COULOMB #if HAS_COULOMB
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const real prefactor = 138.935456f*posq1.w*posq2.w; const real prefactor = 138.935456f*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2); tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction); tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
#else #else
const real prefactor = 138.935456f*posq1.w*posq2.w*invR; const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
tempForce += prefactor; tempForce += prefactor;
tempEnergy += select((real) 0, prefactor, includeInteraction); tempEnergy += select((real) 0, prefactor, includeInteraction);
#endif #endif
......
...@@ -19,7 +19,7 @@ __kernel void computeBornSum( ...@@ -19,7 +19,7 @@ __kernel void computeBornSum(
#else #else
__global real* restrict global_bornSum, __global real* restrict global_bornSum,
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const real* restrict charge, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -45,6 +45,7 @@ __kernel void computeBornSum( ...@@ -45,6 +45,7 @@ __kernel void computeBornSum(
real bornSum = 0.0f; real bornSum = 0.0f;
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -52,7 +53,7 @@ __kernel void computeBornSum( ...@@ -52,7 +53,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = posq1.x; localData[get_local_id(0)].x = posq1.x;
localData[get_local_id(0)].y = posq1.y; localData[get_local_id(0)].y = posq1.y;
localData[get_local_id(0)].z = posq1.z; localData[get_local_id(0)].z = posq1.z;
localData[get_local_id(0)].q = posq1.w; localData[get_local_id(0)].q = charge1;
localData[get_local_id(0)].radius = params1.x; localData[get_local_id(0)].radius = params1.x;
localData[get_local_id(0)].scaledRadius = params1.y; localData[get_local_id(0)].scaledRadius = params1.y;
SYNC_WARPS; SYNC_WARPS;
...@@ -93,7 +94,7 @@ __kernel void computeBornSum( ...@@ -93,7 +94,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = tempPosq.x; localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; localData[get_local_id(0)].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[get_local_id(0)].radius = tempParams.x; localData[get_local_id(0)].radius = tempParams.x;
localData[get_local_id(0)].scaledRadius = tempParams.y; localData[get_local_id(0)].scaledRadius = tempParams.y;
...@@ -230,6 +231,7 @@ __kernel void computeBornSum( ...@@ -230,6 +231,7 @@ __kernel void computeBornSum(
// Load atom data for this tile. // Load atom data for this tile.
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx]; unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
...@@ -242,7 +244,7 @@ __kernel void computeBornSum( ...@@ -242,7 +244,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = tempPosq.x; localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; localData[get_local_id(0)].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[get_local_id(0)].radius = tempParams.x; localData[get_local_id(0)].radius = tempParams.x;
localData[get_local_id(0)].scaledRadius = tempParams.y; localData[get_local_id(0)].scaledRadius = tempParams.y;
...@@ -385,7 +387,8 @@ __kernel void computeGBSAForce1( ...@@ -385,7 +387,8 @@ __kernel void computeGBSAForce1(
#else #else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce, __global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif #endif
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, int needEnergy, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict charge,
__global const real* restrict global_bornRadii, int needEnergy,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -412,6 +415,7 @@ __kernel void computeGBSAForce1( ...@@ -412,6 +415,7 @@ __kernel void computeGBSAForce1(
real4 force = 0.0f; real4 force = 0.0f;
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -420,13 +424,14 @@ __kernel void computeGBSAForce1( ...@@ -420,13 +424,14 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = posq1.x; localData[localAtomIndex].x = posq1.x;
localData[localAtomIndex].y = posq1.y; localData[localAtomIndex].y = posq1.y;
localData[localAtomIndex].z = posq1.z; localData[localAtomIndex].z = posq1.z;
localData[localAtomIndex].q = posq1.w; localData[localAtomIndex].q = charge1;
localData[get_local_id(0)].bornRadius = bornRadius1; localData[get_local_id(0)].bornRadius = bornRadius1;
SYNC_WARPS; SYNC_WARPS;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q); real3 pos2 = (real3) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[tbx+j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -442,7 +447,7 @@ __kernel void computeGBSAForce1( ...@@ -442,7 +447,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -471,7 +476,7 @@ __kernel void computeGBSAForce1( ...@@ -471,7 +476,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].x = tempPosq.x; localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; localData[get_local_id(0)].q = charge[j];
localData[get_local_id(0)].bornRadius = global_bornRadii[j]; localData[get_local_id(0)].bornRadius = global_bornRadii[j];
localData[get_local_id(0)].fx = 0.0f; localData[get_local_id(0)].fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f; localData[get_local_id(0)].fy = 0.0f;
...@@ -481,8 +486,9 @@ __kernel void computeGBSAForce1( ...@@ -481,8 +486,9 @@ __kernel void computeGBSAForce1(
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -498,7 +504,7 @@ __kernel void computeGBSAForce1( ...@@ -498,7 +504,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -617,6 +623,7 @@ __kernel void computeGBSAForce1( ...@@ -617,6 +623,7 @@ __kernel void computeGBSAForce1(
// Load atom data for this tile. // Load atom data for this tile.
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx]; unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
...@@ -629,7 +636,7 @@ __kernel void computeGBSAForce1( ...@@ -629,7 +636,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].x = tempPosq.x; localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y; localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z; localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w; localData[get_local_id(0)].q = charge[j];
localData[get_local_id(0)].bornRadius = global_bornRadii[j]; localData[get_local_id(0)].bornRadius = global_bornRadii[j];
localData[get_local_id(0)].fx = 0.0f; localData[get_local_id(0)].fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f; localData[get_local_id(0)].fy = 0.0f;
...@@ -650,8 +657,9 @@ __kernel void computeGBSAForce1( ...@@ -650,8 +657,9 @@ __kernel void computeGBSAForce1(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
...@@ -662,7 +670,7 @@ __kernel void computeGBSAForce1( ...@@ -662,7 +670,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -694,8 +702,9 @@ __kernel void computeGBSAForce1( ...@@ -694,8 +702,9 @@ __kernel void computeGBSAForce1(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -711,7 +720,7 @@ __kernel void computeGBSAForce1( ...@@ -711,7 +720,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
{ {
real invRSquaredOver4 = 0.25f*invR*invR; real invRSquaredOver4 = 0.25f*invR*invR;
real rScaledRadiusJ = r+obcParams2.y; real rScaledRadiusJ = r+OBC_PARAMS2.y;
real rScaledRadiusI = r+obcParams1.y; real rScaledRadiusI = r+OBC_PARAMS1.y;
real l_ijJ = RECIP(max((real) obcParams1.x, fabs(r-obcParams2.y))); real l_ijJ = RECIP(max((real) OBC_PARAMS1.x, fabs(r-OBC_PARAMS2.y)));
real l_ijI = RECIP(max((real) obcParams2.x, fabs(r-obcParams1.y))); real l_ijI = RECIP(max((real) OBC_PARAMS2.x, fabs(r-OBC_PARAMS1.y)));
real u_ijJ = RECIP(rScaledRadiusJ); real u_ijJ = RECIP(rScaledRadiusJ);
real u_ijI = RECIP(rScaledRadiusI); real u_ijI = RECIP(rScaledRadiusI);
real l_ij2J = l_ijJ*l_ijJ; real l_ij2J = l_ijJ*l_ijJ;
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
real t1I = LOG(u_ijI*RECIP(l_ijI)); real t1I = LOG(u_ijI*RECIP(l_ijI));
real t2J = (l_ij2J-u_ij2J); real t2J = (l_ij2J-u_ij2J);
real t2I = (l_ij2I-u_ij2I); real t2I = (l_ij2I-u_ij2I);
real term1 = (0.5f*(0.25f+obcParams2.y*obcParams2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR; real term1 = (0.5f*(0.25f+OBC_PARAMS2.y*OBC_PARAMS2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+obcParams1.y*obcParams1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR; real term2 = (0.5f*(0.25f+OBC_PARAMS1.y*OBC_PARAMS1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (obcParams1.x < rScaledRadiusJ ? bornForce1*term1 : (real) 0); real tempdEdR = (OBC_PARAMS1.x < rScaledRadiusJ ? BORN_FORCE1*term1 : (real) 0);
tempdEdR += (obcParams2.x < rScaledRadiusI ? bornForce2*term2 : (real) 0); tempdEdR += (OBC_PARAMS2.x < rScaledRadiusI ? BORN_FORCE2*term2 : (real) 0);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
bool includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED); bool includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED);
#else #else
......
...@@ -18,7 +18,7 @@ __kernel void computeBornSum( ...@@ -18,7 +18,7 @@ __kernel void computeBornSum(
#else #else
__global real* restrict global_bornSum, __global real* restrict global_bornSum,
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const real* restrict charge, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -46,7 +46,7 @@ __kernel void computeBornSum( ...@@ -46,7 +46,7 @@ __kernel void computeBornSum(
localData[localAtomIndex].x = tempPosq.x; localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y; localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z; localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w; localData[localAtomIndex].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[localAtomIndex].radius = tempParams.x; localData[localAtomIndex].radius = tempParams.x;
localData[localAtomIndex].scaledRadius = tempParams.y; localData[localAtomIndex].scaledRadius = tempParams.y;
...@@ -60,8 +60,9 @@ __kernel void computeBornSum( ...@@ -60,8 +60,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -109,8 +110,9 @@ __kernel void computeBornSum( ...@@ -109,8 +110,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -238,7 +240,7 @@ __kernel void computeBornSum( ...@@ -238,7 +240,7 @@ __kernel void computeBornSum(
localData[localAtomIndex].x = tempPosq.x; localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y; localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z; localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w; localData[localAtomIndex].q = charge[j];
float2 tempParams = global_params[j]; float2 tempParams = global_params[j];
localData[localAtomIndex].radius = tempParams.x; localData[localAtomIndex].radius = tempParams.x;
localData[localAtomIndex].scaledRadius = tempParams.y; localData[localAtomIndex].scaledRadius = tempParams.y;
...@@ -261,8 +263,9 @@ __kernel void computeBornSum( ...@@ -261,8 +263,9 @@ __kernel void computeBornSum(
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX) APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
...@@ -316,8 +319,9 @@ __kernel void computeBornSum( ...@@ -316,8 +319,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -407,7 +411,8 @@ __kernel void computeGBSAForce1( ...@@ -407,7 +411,8 @@ __kernel void computeGBSAForce1(
#else #else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce, __global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif #endif
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, int needEnergy, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict charge,
__global const real* restrict global_bornRadii, int needEnergy,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -436,7 +441,7 @@ __kernel void computeGBSAForce1( ...@@ -436,7 +441,7 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = tempPosq.x; localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y; localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z; localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w; localData[localAtomIndex].q = charge[j];
localData[localAtomIndex].bornRadius = global_bornRadii[j]; localData[localAtomIndex].bornRadius = global_bornRadii[j];
} }
if (x == y) { if (x == y) {
...@@ -446,10 +451,12 @@ __kernel void computeGBSAForce1( ...@@ -446,10 +451,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -467,7 +474,7 @@ __kernel void computeGBSAForce1( ...@@ -467,7 +474,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -510,10 +517,12 @@ __kernel void computeGBSAForce1( ...@@ -510,10 +517,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1]; real bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -531,7 +540,7 @@ __kernel void computeGBSAForce1( ...@@ -531,7 +540,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -651,7 +660,7 @@ __kernel void computeGBSAForce1( ...@@ -651,7 +660,7 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = tempPosq.x; localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y; localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z; localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w; localData[localAtomIndex].q = charge[j];
localData[localAtomIndex].bornRadius = global_bornRadii[j]; localData[localAtomIndex].bornRadius = global_bornRadii[j];
localData[localAtomIndex].fx = 0.0f; localData[localAtomIndex].fx = 0.0f;
localData[localAtomIndex].fy = 0.0f; localData[localAtomIndex].fy = 0.0f;
...@@ -672,11 +681,13 @@ __kernel void computeGBSAForce1( ...@@ -672,11 +681,13 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX) APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float bornRadius1 = global_bornRadii[atom1]; float bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
...@@ -688,7 +699,7 @@ __kernel void computeGBSAForce1( ...@@ -688,7 +699,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
...@@ -730,10 +741,12 @@ __kernel void computeGBSAForce1( ...@@ -730,10 +741,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float bornRadius1 = global_bornRadii[atom1]; float bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
...@@ -752,7 +765,7 @@ __kernel void computeGBSAForce1( ...@@ -752,7 +765,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij); real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm; real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2); real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w; real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator); real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2); real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij); real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment