Commit 12737efd authored by Peter Eastman's avatar Peter Eastman
Browse files

Implemented CUDA calculation for Ewald energy

parent 78c26f71
......@@ -85,7 +85,7 @@ public:
int nonbondedMethod, customNonbondedMethod;
int cmMotionFrequency;
int stepCount, computeForceCount;
double time;
double time, ewaldSelfEnergy;
std::map<std::string, std::string> propertyValues;
};
......
......@@ -59,53 +59,26 @@ static void calcForces(ContextImpl& context, CudaPlatform::PlatformData& data) {
kReduceForces(gpu);
}
//static double calcEnergy(ContextImpl& context, System& system) {
static double calcEnergy(ContextImpl& context, CudaPlatform::PlatformData& data, System& system) {
// New section 2009-09-03: calculate energies and forces, then return reduced energies
_gpuContext* gpu = data.gpu;
if (gpu->sim.nonbondedMethod == EWALD)
{
// We don't currently have GPU kernels to calculate energy, so instead we have the reference
// platform do it. This is VERY slow.
LangevinIntegrator integrator(0.0, 1.0, 0.0);
ReferencePlatform platform;
Context refContext(system, integrator, platform);
const Stream& positions = context.getPositions();
double* posData = new double[positions.getSize()*3];
positions.saveToArray(posData);
vector<Vec3> pos(positions.getSize());
for (int i = 0; i < (int)pos.size(); i++)
pos[i] = Vec3(posData[3*i], posData[3*i+1], posData[3*i+2]);
delete[] posData;
refContext.setPositions(pos);
return refContext.getState(State::Energy).getPotentialEnergy();
}
else
{
if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0)
gpuReorderAtoms(gpu);
data.stepCount++;
kClearEnergy(gpu);
if (gpu->bIncludeGBSA) {
gpu->bRecalculateBornRadii = true;
kCalculateCDLJObcGbsaForces1(gpu);
kReduceObcGbsaBornForces(gpu);
kCalculateObcGbsaForces2(gpu);
}
else if (data.hasNonbonded)
kCalculateCDLJForces(gpu);
if (data.hasCustomNonbonded)
kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
kCalculateLocalForces(gpu);
if (gpu->bIncludeGBSA)
kReduceBornSumAndForces(gpu);
return kReduceEnergy(gpu);
if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0)
gpuReorderAtoms(gpu);
data.stepCount++;
kClearEnergy(gpu);
if (gpu->bIncludeGBSA) {
gpu->bRecalculateBornRadii = true;
kCalculateCDLJObcGbsaForces1(gpu);
kReduceObcGbsaBornForces(gpu);
kCalculateObcGbsaForces2(gpu);
}
return 0.0f;
else if (data.hasNonbonded)
kCalculateCDLJForces(gpu);
if (data.hasCustomNonbonded)
kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
kCalculateLocalForces(gpu);
if (gpu->bIncludeGBSA)
kReduceBornSumAndForces(gpu);
return kReduceEnergy(gpu)+data.ewaldSelfEnergy;
}
void CudaInitializeForcesKernel::initialize(const System& system) {
......@@ -325,7 +298,6 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
gpuSetPeriodicBoxSize(gpu, (float)boxVectors[0][0], (float)boxVectors[1][1], (float)boxVectors[2][2]);
method = PERIODIC;
}
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
Vec3 boxVectors[3];
force.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
......@@ -350,6 +322,14 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
data.nonbondedMethod = method;
gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
// Compute the Ewald self energy.
data.ewaldSelfEnergy = 0.0;
double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
if (force.getNonbondedMethod() == NonbondedForce::Ewald)
for (int i = 0; i < numParticles; i++)
data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
}
// Initialize 1-4 nonbonded interactions.
......
......@@ -106,7 +106,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform::PlatformData::PlatformData(_gpuContext* gpu) : gpu(gpu), removeCM(false), nonbondedMethod(0), customNonbondedMethod(0), hasBonds(false), hasAngles(false),
hasPeriodicTorsions(false), hasRB(false), hasNonbonded(false), hasCustomNonbonded(false), primaryKernel(NULL), stepCount(0), computeForceCount(0), time(0.0) {
hasPeriodicTorsions(false), hasRB(false), hasNonbonded(false), hasCustomNonbonded(false), primaryKernel(NULL), stepCount(0), computeForceCount(0), time(0.0),
ewaldSelfEnergy(0.0) {
stringstream device;
device << gpu->device;
propertyValues[CudaPlatform::CudaDevice()] = device.str();
......
......@@ -321,7 +321,6 @@ struct cudaGmxSimulation {
float cellVolume; // Ewald parameter alpha (a.k.a. kappa)
float alphaEwald; // Ewald parameter alpha (a.k.a. kappa)
float factorEwald; // - 1 ( 4 * alphaEwald * alphaEwald)
float selfEnergyEwald; //
int kmaxX; // Maximum number of reciprocal vectors in the X direction
int kmaxY; // Maximum number of reciprocal vectors in the Y direction
int kmaxZ; // Maximum number of reciprocal vectors in the Z direction
......
......@@ -53,11 +53,16 @@ __device__ float2 ConjMultofFloat2(float2 a, float2 b)
__global__ void kCalculateEwaldFastCosSinSums_kernel()
{
const float epsilon = 1.0;
const float recipCoeff = cSim.epsfac*(4*PI/cSim.cellVolume/epsilon);
const unsigned int ksizex = 2*cSim.kmaxX-1;
const unsigned int ksizey = 2*cSim.kmaxY-1;
const unsigned int ksizez = 2*cSim.kmaxZ-1;
const unsigned int totalK = ksizex*ksizey*ksizez;
unsigned int index = threadIdx.x + blockIdx.x * blockDim.x;
float energy = 0.0f;
while (index < (cSim.kmaxY-1)*ksizez+cSim.kmaxZ)
index += blockDim.x * gridDim.x;
while (index < totalK)
{
// Find the wave vector (kx, ky, kz) this index corresponds to.
......@@ -87,8 +92,15 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
sum.y += apos.w*structureFactor.y;
}
cSim.pEwaldCosSinSum[index] = sum;
// Compute the contribution to the energy.
float k2 = kx*kx + ky*ky + kz*kz;
float ak = exp(k2*cSim.factorEwald) / k2;
energy += recipCoeff*ak*(sum.x*sum.x + sum.y*sum.y);
index += blockDim.x * gridDim.x;
}
cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
......@@ -98,8 +110,6 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
__global__ void kCalculateEwaldFastForces_kernel()
{
float PI = 3.14159265358979323846f;
const float epsilon = 1.0;
float recipCoeff = cSim.epsfac*(4*PI/cSim.cellVolume/epsilon);
......
......@@ -118,7 +118,6 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel
#include "kCalculateCDLJEwaldReciprocal.h"
#include "kCalculateCDLJEwaldFastReciprocal.h"
......@@ -202,20 +201,10 @@ void kCalculateCDLJForces(gpuContext gpu)
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
if (false)
{
// O(N2) Ewald summation
kCalculateCDLJEwaldReciprocalForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateCDLJEwaldReciprocalForces");
}
else
{
// Fast Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
}
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
}
}
......@@ -117,7 +117,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -178,19 +178,8 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
//CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
// direct space sum
// CDLJ_energy += 0.5f * apos.w * psA[j].q * erfc(alphaR) * invR;
// reciprocal space sum
CDLJ_energy = 0.5f * apos.w * psA[j].q / PI;
CDLJ_energy *= exp ( - alphaR * alphaR) / SQRT_PI;
// self Ewald Energy
// factor 2 necessary here, because energy is divided by 2 below
// CDLJ_energy += -cSim.epsfac * (psA[i].q * psA[i].q) * cSim.alphaEwald / SQRT_PI;
// if (r != 0) energy = cSim.alphaEwald;
// if (r != 0) energy = apos.w;
#else
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
......@@ -299,7 +288,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -365,7 +354,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -468,7 +457,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......
......@@ -44,6 +44,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
#endif
#ifdef USE_EWALD
const float SQRT_PI = sqrt(PI);
#endif
unsigned int lasty = -0xFFFFFFFF;
while (pos < end)
{
......@@ -103,9 +107,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
......@@ -176,9 +188,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
......@@ -310,9 +330,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
......@@ -390,9 +418,16 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
......@@ -515,9 +550,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
......
......@@ -384,7 +384,7 @@ void testEwald() {
ASSERT_EQUAL_VEC(Vec3(-123.711, 64.1877, -302.716), forces[0], 10*TOL);
ASSERT_EQUAL_VEC(Vec3( 123.711, -64.1877, 302.716), forces[1], 10*TOL);
// ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 10*TOL);
ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/);
}
......
......@@ -267,7 +267,7 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
// PME
if (pme) {
pme_t pmedata; /* abstract handle for PME data */
int ngrid[3];
......@@ -451,7 +451,7 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
if( totalEnergy )
*totalEnergy += realSpaceEwaldEnergy + vdwEnergy;
if( energyByAtom ){
energyByAtom[ii] += realSpaceEwaldEnergy + vdwEnergy;
energyByAtom[jj] += realSpaceEwaldEnergy + vdwEnergy;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment