Unverified Commit 73cac8e6 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #2085 from peastman/charges

Support multiple NonbondedForces in one System 
parents a885a6ae daa9de8a
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -268,7 +268,7 @@ public:
private:
class PmeIO;
CpuPlatform::PlatformData& data;
int numParticles, num14;
int numParticles, num14, posqIndex;
int **bonded14IndexArray;
double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
......@@ -277,6 +277,7 @@ private:
std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
std::vector<float> charges;
NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded;
Kernel optimizedPme, optimizedDispersionPme;
......@@ -363,7 +364,9 @@ public:
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
CpuPlatform::PlatformData& data;
int posqIndex;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> charges;
CpuGBSAOBCForce obc;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -92,6 +92,7 @@ public:
PlatformData(int numParticles, int numThreads, bool deterministicForces);
~PlatformData();
void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
int requestPosqIndex();
AlignedArray<float> posq;
std::vector<AlignedArray<float> > threadForce;
ThreadPool threads;
......@@ -101,6 +102,7 @@ public:
CpuNeighborList* neighborList;
double cutoff, paddedCutoff;
bool anyExclusions, deterministicForces;
int currentPosqIndex, nextPosqIndex;
std::vector<std::set<int> > exclusions;
};
......
......@@ -138,6 +138,19 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy;
}
/**
* Copy particle charges into the fourth element of the posq array.
*/
static void copyChargesToPosq(ContextImpl& context, const vector<float>& charges, int index) {
CpuPlatform::PlatformData& data = CpuPlatform::getPlatformData(context);
if (index == data.currentPosqIndex)
return;
data.currentPosqIndex = index;
AlignedArray<float>& posq = data.posq;
for (int i = 0; i < charges.size(); i++)
posq[4*i+3] = charges[i];
}
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
......@@ -530,6 +543,7 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
}
void CpuCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
posqIndex = data.requestPosqIndex();
// Identify which exceptions are 1-4 interactions.
......@@ -556,12 +570,13 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles);
charges.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge;
charges[i] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge;
......@@ -660,6 +675,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
copyChargesToPosq(context, charges, posqIndex);
AlignedArray<float>& posq = data.posq;
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
......@@ -726,11 +742,12 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the values.
posqIndex = data.requestPosqIndex();
double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge;
charges[i] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
sumSquaredCharges += charge*charge;
}
......@@ -964,12 +981,14 @@ CpuCalcGBSAOBCForceKernel::~CpuCalcGBSAOBCForceKernel() {
}
void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
posqIndex = data.requestPosqIndex();
int numParticles = system.getNumParticles();
particleParams.resize(numParticles);
charges.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge;
charges[i] = (float) charge;
radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
}
......@@ -983,6 +1002,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo
}
double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
copyChargesToPosq(context, charges, posqIndex);
if (data.isPeriodic) {
Vec3& boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
......@@ -1000,10 +1020,11 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
// Record the values.
posqIndex = data.requestPosqIndex();
for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge;
charges[i] = (float) charge;
radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -147,7 +147,7 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
}
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false), currentPosqIndex(-1), nextPosqIndex(0) {
numThreads = threads.getNumThreads();
threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++)
......@@ -184,3 +184,7 @@ void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, doubl
else if (!anyExclusions)
exclusions = exclusionList;
}
int CpuPlatform::PlatformData::requestPosqIndex() {
return nextPosqIndex++;
}
\ No newline at end of file
......@@ -503,6 +503,11 @@ public:
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/**
* Request to use the fourth element of the posq array for storing charges. Since only one force can
* do that, this returns true the first time it is called, and false on all subsequent calls.
*/
bool requestPosqCharges();
/**
* Get the thread used by this context for executing parallel computations.
*/
......@@ -626,7 +631,7 @@ private:
int paddedNumAtoms;
int numAtomBlocks;
int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid, hasAssignedPosqCharges;
bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
......
......@@ -675,6 +675,7 @@ private:
CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT;
CudaArray charges;
CudaArray sigmaEpsilon;
CudaArray exceptionParams;
CudaArray cosSinSums;
......@@ -719,7 +720,7 @@ private:
int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME, usePosqCharges;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -815,6 +816,7 @@ private:
int maxTiles;
CudaContext& cu;
ForceInfo* info;
CudaArray charges;
CudaArray params;
CudaArray bornSum;
CudaArray bornRadii;
......
......@@ -107,8 +107,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData, CudaContext* originalContext) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasAssignedPosqCharges(false),
hasCompilerKernel(false), isNvccAvailable(false), pinnedBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
......@@ -301,6 +301,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["make_mixed4"] = "make_float4";
}
posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0));
atomIndexDevice.initialize<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
// Create utility kernels that are used in multiple places.
......@@ -475,11 +480,6 @@ void CudaContext::initialize() {
energyParamDerivBuffer.initialize<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(energyParamDerivBuffer);
}
atomIndexDevice.initialize<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
findMoleculeGroups();
nonbonded->initialize(system);
}
......@@ -887,14 +887,14 @@ void CudaContext::setCharges(const vector<double>& charges) {
if (!chargeBuffer.isInitialized())
chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
vector<double> c(numAtoms);
for (int i = 0; i < numAtoms; i++)
c[i] = charges[i];
chargeBuffer.upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
vector<float> c(numAtoms);
for (int i = 0; i < numAtoms; i++)
c[i] = (float) charges[i];
chargeBuffer.upload(c);
}
......@@ -902,6 +902,12 @@ void CudaContext::setCharges(const vector<double>& charges) {
executeKernel(setChargesKernel, args, numAtoms);
}
bool CudaContext::requestPosqCharges() {
bool allow = !hasAssignedPosqCharges;
hasAssignedPosqCharges = true;
return allow;
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
......
This diff is collapsed.
......@@ -4,7 +4,11 @@
unsigned int includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
#if HAS_COULOMB
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR);
......@@ -30,7 +34,7 @@
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
......@@ -40,6 +44,7 @@
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = ERF(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
......@@ -47,8 +52,9 @@
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
......@@ -59,11 +65,11 @@
}
else {
#if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
......@@ -108,11 +114,11 @@
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = includeInteraction ? epssig6*(sig6 - 1) : 0;
#if USE_LJ_SWITCH
......@@ -128,11 +134,11 @@
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = 138.935456f*posq1.w*posq2.w;
const real prefactor = 138.935456f*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += includeInteraction ? prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C) : 0;
#else
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
tempForce += prefactor;
tempEnergy += includeInteraction ? prefactor : 0;
#endif
......
......@@ -66,7 +66,7 @@ typedef struct {
/**
* Compute the Born sum.
*/
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const real* __restrict__ charge, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
......@@ -92,6 +92,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
real bornSum = 0;
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1];
if (x == y) {
// This tile is on the diagonal.
......@@ -99,7 +100,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z;
localData[threadIdx.x].q = posq1.w;
localData[threadIdx.x].q = charge1;
localData[threadIdx.x].radius = params1.x;
localData[threadIdx.x].scaledRadius = params1.y;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
......@@ -138,7 +139,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w;
localData[threadIdx.x].q = charge[j];
float2 tempParams = global_params[j];
localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y;
......@@ -262,6 +263,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// Load atom data for this tile.
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1];
#ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
......@@ -274,7 +276,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w;
localData[threadIdx.x].q = charge[j];
float2 tempParams = global_params[j];
localData[threadIdx.x].radius = tempParams.x;
localData[threadIdx.x].scaledRadius = tempParams.y;
......@@ -400,7 +402,7 @@ typedef struct {
*/
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii, bool needEnergy,
mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ charge, const real* __restrict__ global_bornRadii, bool needEnergy,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
......@@ -427,6 +429,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 force = make_real4(0);
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
if (x == y) {
// This tile is on the diagonal.
......@@ -434,12 +437,13 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z;
localData[threadIdx.x].q = posq1.w;
localData[threadIdx.x].q = charge1;
localData[threadIdx.x].bornRadius = bornRadius1;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real3 pos2 = make_real3(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z);
real charge2 = localData[tbx+j].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -455,7 +459,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -485,7 +489,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w;
localData[threadIdx.x].q = charge[j];
localData[threadIdx.x].bornRadius = global_bornRadii[j];
localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f;
......@@ -494,8 +498,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -511,7 +516,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -617,6 +622,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// Load atom data for this tile.
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
#ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
......@@ -629,7 +635,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
localData[threadIdx.x].x = tempPosq.x;
localData[threadIdx.x].y = tempPosq.y;
localData[threadIdx.x].z = tempPosq.z;
localData[threadIdx.x].q = tempPosq.w;
localData[threadIdx.x].q = charge[j];
localData[threadIdx.x].bornRadius = global_bornRadii[j];
localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f;
......@@ -648,8 +654,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
......@@ -660,7 +667,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -693,8 +700,9 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real3 pos2 = make_real3(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real3 delta = make_real3(pos2.x-posq1.x, pos2.y-posq1.y, pos2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -710,7 +718,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
{
real invRSquaredOver4 = 0.25f*invR*invR;
real rScaledRadiusJ = r+obcParams2.y;
real rScaledRadiusI = r+obcParams1.y;
real l_ijJ = RECIP(max(obcParams1.x, fabs(r-obcParams2.y)));
real l_ijI = RECIP(max(obcParams2.x, fabs(r-obcParams1.y)));
real rScaledRadiusJ = r+OBC_PARAMS2.y;
real rScaledRadiusI = r+OBC_PARAMS1.y;
real l_ijJ = RECIP(max(OBC_PARAMS1.x, fabs(r-OBC_PARAMS2.y)));
real l_ijI = RECIP(max(OBC_PARAMS2.x, fabs(r-OBC_PARAMS1.y)));
real u_ijJ = RECIP(rScaledRadiusJ);
real u_ijI = RECIP(rScaledRadiusI);
real l_ij2J = l_ijJ*l_ijJ;
......@@ -14,10 +14,10 @@
real t1I = LOG(u_ijI*RECIP(l_ijI));
real t2J = (l_ij2J-u_ij2J);
real t2I = (l_ij2I-u_ij2I);
real term1 = (0.5f*(0.25f+obcParams2.y*obcParams2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+obcParams1.y*obcParams1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (obcParams1.x < rScaledRadiusJ ? bornForce1*term1/0x100000000 : 0);
tempdEdR += (obcParams2.x < rScaledRadiusI ? bornForce2*term2/0x100000000 : 0);
real term1 = (0.5f*(0.25f+OBC_PARAMS2.y*OBC_PARAMS2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+OBC_PARAMS1.y*OBC_PARAMS1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (OBC_PARAMS1.x < rScaledRadiusJ ? BORN_FORCE1*term1/0x100000000 : 0);
tempdEdR += (OBC_PARAMS2.x < rScaledRadiusI ? BORN_FORCE2*term2/0x100000000 : 0);
#ifdef USE_CUTOFF
unsigned int includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED);
#else
......
......@@ -3,8 +3,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i];
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real4 pos = posq[atom];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
......@@ -15,7 +15,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
pmeAtomGridIndex[i] = make_int2(i, gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z);
pmeAtomGridIndex[atom] = make_int2(atom, gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z);
}
}
......@@ -24,6 +24,8 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#else
, const real* __restrict__ charges
#endif
) {
real3 data[PME_ORDER];
......@@ -39,7 +41,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
const real charge = CHARGE;
#endif
if (charge == 0)
continue;
......@@ -253,6 +255,8 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#else
, const real* __restrict__ charges
#endif
) {
real3 data[PME_ORDER];
......@@ -330,7 +334,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR;
real q = CHARGE*EPSILON_FACTOR;
#endif
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
......
......@@ -618,6 +618,11 @@ public:
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/**
* Request to use the fourth element of the posq array for storing charges. Since only one force can
* do that, this returns true the first time it is called, and false on all subsequent calls.
*/
bool requestPosqCharges();
/**
* Get the thread used by this context for executing parallel computations.
*/
......@@ -738,7 +743,7 @@ private:
int numThreadBlocks;
int numForceBuffers;
int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid, hasAssignedPosqCharges;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions;
......
......@@ -652,6 +652,7 @@ private:
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel;
OpenCLArray charges;
OpenCLArray sigmaEpsilon;
OpenCLArray exceptionParams;
OpenCLArray cosSinSums;
......@@ -698,7 +699,7 @@ private:
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int gridSizeX, gridSizeY, gridSizeZ;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME, usePosqCharges;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -795,6 +796,7 @@ private:
OpenCLContext& cl;
ForceInfo* info;
OpenCLArray params;
OpenCLArray charges;
OpenCLArray bornSum;
OpenCLArray longBornSum;
OpenCLArray bornRadii;
......
......@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false),
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), hasAssignedPosqCharges(false),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") {
useDoublePrecision = false;
......@@ -295,6 +295,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["convert_mixed4"] = "convert_float4";
}
posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0));
atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
}
catch (cl::Error err) {
std::stringstream str;
......@@ -494,11 +499,6 @@ void OpenCLContext::initialize() {
((mm_float4*) pinnedMemory)[i] = mm_float4(0.0f, 0.0f, 0.0f, mass == 0.0 ? 0.0f : (cl_float) (1.0/mass));
}
velm.upload(pinnedMemory);
atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
atomIndex[i] = i;
atomIndexDevice.upload(atomIndex);
findMoleculeGroups();
nonbonded->initialize(system);
}
......@@ -770,14 +770,14 @@ void OpenCLContext::setCharges(const vector<double>& charges) {
if (!chargeBuffer.isInitialized())
chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
vector<double> c(numAtoms);
for (int i = 0; i < numAtoms; i++)
c[i] = charges[i];
chargeBuffer.upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
vector<float> c(numAtoms);
for (int i = 0; i < numAtoms; i++)
c[i] = (float) charges[i];
chargeBuffer.upload(c);
}
......@@ -788,6 +788,12 @@ void OpenCLContext::setCharges(const vector<double>& charges) {
executeKernel(setChargesKernel, numAtoms);
}
bool OpenCLContext::requestPosqCharges() {
bool allow = !hasAssignedPosqCharges;
hasAssignedPosqCharges = true;
return allow;
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
......
This diff is collapsed.
......@@ -9,7 +9,11 @@
includeInteraction = ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
#if HAS_COULOMB
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR);
......@@ -35,7 +39,7 @@
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
......@@ -45,6 +49,7 @@
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
#if HAS_COULOMB
if (1-erfcAlphaR > 1e-6) {
real erfAlphaR = erf(alphaR); // Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
......@@ -52,8 +57,9 @@
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*CHARGE1*CHARGE2;
}
#endif
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
......@@ -64,11 +70,11 @@
}
else {
#if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
......@@ -113,11 +119,11 @@
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH
......@@ -134,11 +140,11 @@
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = 138.935456f*posq1.w*posq2.w;
const real prefactor = 138.935456f*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
#else
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
const real prefactor = 138.935456f*CHARGE1*CHARGE2*invR;
tempForce += prefactor;
tempEnergy += select((real) 0, prefactor, includeInteraction);
#endif
......
......@@ -19,7 +19,7 @@ __kernel void computeBornSum(
#else
__global real* restrict global_bornSum,
#endif
__global const real4* restrict posq, __global const float2* restrict global_params,
__global const real4* restrict posq, __global const real* restrict charge, __global const float2* restrict global_params,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -45,6 +45,7 @@ __kernel void computeBornSum(
real bornSum = 0.0f;
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1];
if (x == y) {
// This tile is on the diagonal.
......@@ -52,7 +53,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = posq1.x;
localData[get_local_id(0)].y = posq1.y;
localData[get_local_id(0)].z = posq1.z;
localData[get_local_id(0)].q = posq1.w;
localData[get_local_id(0)].q = charge1;
localData[get_local_id(0)].radius = params1.x;
localData[get_local_id(0)].scaledRadius = params1.y;
SYNC_WARPS;
......@@ -93,7 +94,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w;
localData[get_local_id(0)].q = charge[j];
float2 tempParams = global_params[j];
localData[get_local_id(0)].radius = tempParams.x;
localData[get_local_id(0)].scaledRadius = tempParams.y;
......@@ -230,6 +231,7 @@ __kernel void computeBornSum(
// Load atom data for this tile.
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float2 params1 = global_params[atom1];
#ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
......@@ -242,7 +244,7 @@ __kernel void computeBornSum(
localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w;
localData[get_local_id(0)].q = charge[j];
float2 tempParams = global_params[j];
localData[get_local_id(0)].radius = tempParams.x;
localData[get_local_id(0)].scaledRadius = tempParams.y;
......@@ -385,7 +387,8 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, int needEnergy,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict charge,
__global const real* restrict global_bornRadii, int needEnergy,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -412,6 +415,7 @@ __kernel void computeGBSAForce1(
real4 force = 0.0f;
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
if (x == y) {
// This tile is on the diagonal.
......@@ -420,13 +424,14 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = posq1.x;
localData[localAtomIndex].y = posq1.y;
localData[localAtomIndex].z = posq1.z;
localData[localAtomIndex].q = posq1.w;
localData[localAtomIndex].q = charge1;
localData[get_local_id(0)].bornRadius = bornRadius1;
SYNC_WARPS;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z);
real charge2 = localData[tbx+j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -442,7 +447,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -471,7 +476,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w;
localData[get_local_id(0)].q = charge[j];
localData[get_local_id(0)].bornRadius = global_bornRadii[j];
localData[get_local_id(0)].fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f;
......@@ -481,8 +486,9 @@ __kernel void computeGBSAForce1(
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -498,7 +504,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -617,6 +623,7 @@ __kernel void computeGBSAForce1(
// Load atom data for this tile.
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
#ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
......@@ -629,7 +636,7 @@ __kernel void computeGBSAForce1(
localData[get_local_id(0)].x = tempPosq.x;
localData[get_local_id(0)].y = tempPosq.y;
localData[get_local_id(0)].z = tempPosq.z;
localData[get_local_id(0)].q = tempPosq.w;
localData[get_local_id(0)].q = charge[j];
localData[get_local_id(0)].bornRadius = global_bornRadii[j];
localData[get_local_id(0)].fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f;
......@@ -650,8 +657,9 @@ __kernel void computeGBSAForce1(
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
......@@ -662,7 +670,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -694,8 +702,9 @@ __kernel void computeGBSAForce1(
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z);
real charge2 = localData[tbx+tj].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -711,7 +720,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
{
real invRSquaredOver4 = 0.25f*invR*invR;
real rScaledRadiusJ = r+obcParams2.y;
real rScaledRadiusI = r+obcParams1.y;
real l_ijJ = RECIP(max((real) obcParams1.x, fabs(r-obcParams2.y)));
real l_ijI = RECIP(max((real) obcParams2.x, fabs(r-obcParams1.y)));
real rScaledRadiusJ = r+OBC_PARAMS2.y;
real rScaledRadiusI = r+OBC_PARAMS1.y;
real l_ijJ = RECIP(max((real) OBC_PARAMS1.x, fabs(r-OBC_PARAMS2.y)));
real l_ijI = RECIP(max((real) OBC_PARAMS2.x, fabs(r-OBC_PARAMS1.y)));
real u_ijJ = RECIP(rScaledRadiusJ);
real u_ijI = RECIP(rScaledRadiusI);
real l_ij2J = l_ijJ*l_ijJ;
......@@ -14,10 +14,10 @@
real t1I = LOG(u_ijI*RECIP(l_ijI));
real t2J = (l_ij2J-u_ij2J);
real t2I = (l_ij2I-u_ij2I);
real term1 = (0.5f*(0.25f+obcParams2.y*obcParams2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+obcParams1.y*obcParams1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (obcParams1.x < rScaledRadiusJ ? bornForce1*term1 : (real) 0);
tempdEdR += (obcParams2.x < rScaledRadiusI ? bornForce2*term2 : (real) 0);
real term1 = (0.5f*(0.25f+OBC_PARAMS2.y*OBC_PARAMS2.y*invRSquaredOver4)*t2J + t1J*invRSquaredOver4)*invR;
real term2 = (0.5f*(0.25f+OBC_PARAMS1.y*OBC_PARAMS1.y*invRSquaredOver4)*t2I + t1I*invRSquaredOver4)*invR;
real tempdEdR = (OBC_PARAMS1.x < rScaledRadiusJ ? BORN_FORCE1*term1 : (real) 0);
tempdEdR += (OBC_PARAMS2.x < rScaledRadiusI ? BORN_FORCE2*term2 : (real) 0);
#ifdef USE_CUTOFF
bool includeInteraction = (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2 && r2 < CUTOFF_SQUARED);
#else
......
......@@ -18,7 +18,7 @@ __kernel void computeBornSum(
#else
__global real* restrict global_bornSum,
#endif
__global const real4* restrict posq, __global const float2* restrict global_params,
__global const real4* restrict posq, __global const real* restrict charge, __global const float2* restrict global_params,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -46,7 +46,7 @@ __kernel void computeBornSum(
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w;
localData[localAtomIndex].q = charge[j];
float2 tempParams = global_params[j];
localData[localAtomIndex].radius = tempParams.x;
localData[localAtomIndex].scaledRadius = tempParams.y;
......@@ -60,8 +60,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -109,8 +110,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -238,7 +240,7 @@ __kernel void computeBornSum(
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w;
localData[localAtomIndex].q = charge[j];
float2 tempParams = global_params[j];
localData[localAtomIndex].radius = tempParams.x;
localData[localAtomIndex].scaledRadius = tempParams.y;
......@@ -261,8 +263,9 @@ __kernel void computeBornSum(
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
......@@ -316,8 +319,9 @@ __kernel void computeBornSum(
real4 posq1 = posq[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -407,7 +411,8 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, int needEnergy,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict charge,
__global const real* restrict global_bornRadii, int needEnergy,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -436,7 +441,7 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w;
localData[localAtomIndex].q = charge[j];
localData[localAtomIndex].bornRadius = global_bornRadii[j];
}
if (x == y) {
......@@ -446,10 +451,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -467,7 +474,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -510,10 +517,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
real bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -531,7 +540,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -651,7 +660,7 @@ __kernel void computeGBSAForce1(
localData[localAtomIndex].x = tempPosq.x;
localData[localAtomIndex].y = tempPosq.y;
localData[localAtomIndex].z = tempPosq.z;
localData[localAtomIndex].q = tempPosq.w;
localData[localAtomIndex].q = charge[j];
localData[localAtomIndex].bornRadius = global_bornRadii[j];
localData[localAtomIndex].fx = 0.0f;
localData[localAtomIndex].fy = 0.0f;
......@@ -672,11 +681,13 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
......@@ -688,7 +699,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......@@ -730,10 +741,12 @@ __kernel void computeGBSAForce1(
unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0;
real4 posq1 = posq[atom1];
real charge1 = charge[atom1];
float bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real3 pos2 = (real3) (localData[j].x, localData[j].y, localData[j].z);
real charge2 = localData[j].q;
real4 delta = (real4) (pos2 - posq1.xyz, 0);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
......@@ -752,7 +765,7 @@ __kernel void computeGBSAForce1(
real expTerm = EXP(-D_ij);
real denominator2 = r2 + alpha2_ij*expTerm;
real denominator = SQRT(denominator2);
real scaledChargeProduct = PREFACTOR*posq1.w*posq2.w;
real scaledChargeProduct = PREFACTOR*charge1*charge2;
real tempEnergy = scaledChargeProduct*RECIP(denominator);
real Gpol = tempEnergy*RECIP(denominator2);
real dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment