Commit fdc7cc07 authored by Kai Kohlhoff's avatar Kai Kohlhoff
Browse files

All energies existing in Preview Release 3 now run on the GPU, new Ewald...

All energies existing in Preview Release 3 now run on the GPU, new Ewald energy terms still use Reference implementation
parent 559cf7c6
......@@ -41,6 +41,7 @@ using namespace std;
static void calcForces(ContextImpl& context, CudaPlatform::PlatformData& data) {
_gpuContext* gpu = data.gpu;
gpu->bReduceEnergies = false;
if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
gpuReorderAtoms(gpu);
data.computeForceCount++;
......@@ -59,22 +60,67 @@ static void calcForces(ContextImpl& context, CudaPlatform::PlatformData& data) {
kReduceForces(gpu);
}
static double calcEnergy(ContextImpl& context, System& system) {
// We don't currently have GPU kernels to calculate energy, so instead we have the reference
// platform do it. This is VERY slow.
//static double calcEnergy(ContextImpl& context, System& system) {
static double calcEnergy(ContextImpl& context, CudaPlatform::PlatformData& data, System& system) {
LangevinIntegrator integrator(0.0, 1.0, 0.0);
ReferencePlatform platform;
Context refContext(system, integrator, platform);
const Stream& positions = context.getPositions();
double* posData = new double[positions.getSize()*3];
positions.saveToArray(posData);
vector<Vec3> pos(positions.getSize());
for (int i = 0; i < (int)pos.size(); i++)
pos[i] = Vec3(posData[3*i], posData[3*i+1], posData[3*i+2]);
delete[] posData;
refContext.setPositions(pos);
return refContext.getState(State::Energy).getPotentialEnergy();
// New section 2009-09-03: calculate energies and forces, then return reduced energies
_gpuContext* gpu = data.gpu;
if (gpu->sim.nonbondedMethod == EWALD)
{
// We don't currently have GPU kernels to calculate energy, so instead we have the reference
// platform do it. This is VERY slow.
LangevinIntegrator integrator(0.0, 1.0, 0.0);
ReferencePlatform platform;
Context refContext(system, integrator, platform);
const Stream& positions = context.getPositions();
double* posData = new double[positions.getSize()*3];
positions.saveToArray(posData);
vector<Vec3> pos(positions.getSize());
for (int i = 0; i < (int)pos.size(); i++)
pos[i] = Vec3(posData[3*i], posData[3*i+1], posData[3*i+2]);
delete[] posData;
refContext.setPositions(pos);
// printf("Total CPU energies: %f\n", refContext.getState(State::Energy).getPotentialEnergy());
return refContext.getState(State::Energy).getPotentialEnergy();
}
else
{
double totalEnergy = 0.0f;
double energy;
gpu->bReduceEnergies = true;
if (data.nonbondedMethod != NO_CUTOFF && data.stepCount%100 == 0)
gpuReorderAtoms(gpu);
data.stepCount++;
kClearForces(gpu);
if (gpu->bIncludeGBSA) {
gpu->bRecalculateBornRadii = true;
energy = kCalculateCDLJObcGbsaForces1(gpu);
// printf("Calculated CDLJObcGbsa energy: %f\n", energy);
totalEnergy += energy;
energy = kReduceObcGbsaBornForces(gpu);
// printf("Calculated reduced GbsaBorn surface area energy: %f\n", energy);
totalEnergy += energy;
kCalculateObcGbsaForces2(gpu);
}
else if (data.hasNonbonded) {
energy = kCalculateCDLJForces(gpu);
// printf("Calculated CDLJ energy: %f\n", energy);
totalEnergy += energy;
}
energy = kCalculateLocalForces(gpu);
// printf("Calculated local interactions energy: %f\n", energy);
totalEnergy += energy;
if (gpu->bIncludeGBSA)
kReduceBornSumAndForces(gpu);
else
kReduceForces(gpu);
// printf("Total GPU energies: %f\n", totalEnergy);
return totalEnergy;
}
return 0.0f;
}
void CudaInitializeForcesKernel::initialize(const System& system) {
......@@ -122,7 +168,7 @@ void CudaCalcHarmonicBondForceKernel::executeForces(ContextImpl& context) {
double CudaCalcHarmonicBondForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......@@ -156,7 +202,7 @@ void CudaCalcHarmonicAngleForceKernel::executeForces(ContextImpl& context) {
double CudaCalcHarmonicAngleForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......@@ -192,7 +238,7 @@ void CudaCalcPeriodicTorsionForceKernel::executeForces(ContextImpl& context) {
double CudaCalcPeriodicTorsionForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......@@ -234,7 +280,7 @@ void CudaCalcRBTorsionForceKernel::executeForces(ContextImpl& context) {
double CudaCalcRBTorsionForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......@@ -350,7 +396,7 @@ void CudaCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
double CudaCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......@@ -432,7 +478,7 @@ void CudaCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context) {
double CudaCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& context) {
if (data.primaryKernel == this)
return calcEnergy(context, system);
return calcEnergy(context, data, system);
return 0.0;
}
......
......@@ -33,12 +33,12 @@ extern void kReduceObcGbsaBornSum(gpuContext gpu);
extern void kGenerateRandoms(gpuContext gpu);
// Main loop
extern void kCalculateCDLJObcGbsaForces1(gpuContext gpu);
extern void kCalculateCDLJForces(gpuContext gpu);
extern double kCalculateCDLJObcGbsaForces1(gpuContext gpu);
extern double kCalculateCDLJForces(gpuContext gpu);
extern void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid);
extern void kReduceObcGbsaBornForces(gpuContext gpu);
extern double kReduceObcGbsaBornForces(gpuContext gpu);
extern void kCalculateObcGbsaForces2(gpuContext gpu);
extern void kCalculateLocalForces(gpuContext gpu);
extern double kCalculateLocalForces(gpuContext gpu);
extern void kCalculateAndersenThermostat(gpuContext gpu);
extern void kReduceBornSumAndForces(gpuContext gpu);
extern void kApplyFirstShake(gpuContext gpu);
......@@ -58,6 +58,8 @@ extern void kBrownianUpdatePart2(gpuContext gpu);
// Extras
extern void kReduceForces(gpuContext gpu);
extern void kReduceLocalEnergies(gpuContext gpu, float * gpu_energies);
extern double kReduceNonbondEnergies(gpuContext gpu, float * gpu_energies);
extern void kClearBornForces(gpuContext gpu);
// Initializers
......
......@@ -315,10 +315,12 @@ struct cudaGmxSimulation {
float cellVolume; // Ewald parameter alpha (a.k.a. kappa)
float alphaEwald; // Ewald parameter alpha (a.k.a. kappa)
float factorEwald; // - 1 ( 4 * alphaEwald * alphaEwald)
float selfEnergyEwald; //
int kmaxX; // Maximum number of reciprocal vectors in the X direction
int kmaxY; // Maximum number of reciprocal vectors in the Y direction
int kmaxZ; // Maximum number of reciprocal vectors in the Z direction
float reactionFieldK; // Constant for reaction field correction
float reactionFieldC; // Constant for reaction field correction
float probeRadius; // SASA probe radius
float surfaceAreaFactor; // ACE approximation surface area factor
float electricConstant; // ACE approximation electric constant
......
......@@ -555,6 +555,7 @@ void gpuSetNonbondedCutoff(gpuContext gpu, float cutoffDistance, float solventDi
gpu->sim.nonbondedCutoff = cutoffDistance;
gpu->sim.nonbondedCutoffSqr = cutoffDistance*cutoffDistance;
gpu->sim.reactionFieldK = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0f)/(2.0f*solventDielectric+1.0f);
gpu->sim.reactionFieldC = (1.0f / cutoffDistance)*(3.0f*solventDielectric)/(2.0f*solventDielectric+1.0f);
}
extern "C"
......@@ -1440,6 +1441,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->bRemoveCM = false;
gpu->bRecalculateBornRadii = true;
gpu->bIncludeGBSA = false;
gpu->bReduceEnergies = true;
gpuInitializeRandoms(gpu);
// To be determined later
......
......@@ -77,6 +77,7 @@ struct _gpuContext {
bool bRecalculateBornRadii;
bool bOutputBufferPerWarp;
bool bIncludeGBSA;
bool bReduceEnergies;
unsigned long seed;
SM_VERSION sm_version;
CUDPPHandle cudpp;
......
......@@ -124,19 +124,23 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
__global__ extern void kCalculateCDLJCutoffForces_12_kernel();
void kCalculateCDLJForces(gpuContext gpu)
double kCalculateCDLJForces(gpuContext gpu)
{
// printf("kCalculateCDLJCutoffForces\n");
unsigned int N = gpu->sim.nonbond_blocks * gpu->sim.nonbond_threads_per_block;
float *energies_dev;
cudaMalloc((void**) &energies_dev, sizeof(float) * N);
CUDPPResult result;
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, energies_dev);
else
kCalculateCDLJN2Forces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJN2Forces");
break;
case CUTOFF:
......@@ -155,10 +159,10 @@ void kCalculateCDLJForces(gpuContext gpu)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJCutoffByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
else
kCalculateCDLJCutoffForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJCutoffForces");
break;
case PERIODIC:
......@@ -177,10 +181,10 @@ void kCalculateCDLJForces(gpuContext gpu)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJPeriodicByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
else
kCalculateCDLJPeriodicForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJPeriodicForces");
break;
case EWALD:
......@@ -199,17 +203,17 @@ void kCalculateCDLJForces(gpuContext gpu)
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
if (false)
{
// O(N2) Ewald summation
kCalculateCDLJEwaldReciprocalForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateCDLJEwaldReciprocalForces");
}
}
else
{
// Fast Ewald summation
......@@ -220,4 +224,19 @@ void kCalculateCDLJForces(gpuContext gpu)
}
}
double sum = 0.0f;
if (gpu->bReduceEnergies)
{
float *energies = (float*) malloc(sizeof(float) * N);
memset(energies, 0, sizeof(float) * N);
cudaMemcpy(energies, energies_dev, sizeof(float) * N, cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++) sum += energies[i];
/* printf("\n");
for (int i = 0; i < N; i++) printf("\t%f", energies[i]);
printf("\n");
*/
}
cudaFree(energies_dev);
return sum;
}
......@@ -30,7 +30,7 @@
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
__global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit, float * gpu_energies)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
......@@ -38,6 +38,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int energyIndex = blockIdx.x * blockDim.x + threadIdx.x;
float CDLJ_energy;
float energy = 0.0f;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.nonbond_threads_per_block];
#endif
......@@ -108,24 +111,36 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[j].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJ_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
dEdR += apos.w * psA[j].q * invR;
/* E */
CDLJ_energy += apos.w * psA[j].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJ_energy = 0.0f;
}
#endif
/* E */
energy += CDLJ_energy / 2.0;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
......@@ -157,16 +172,35 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[j].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJ_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
#else
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
//CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
// direct space sum
// CDLJ_energy += 0.5f * apos.w * psA[j].q * erfc(alphaR) * invR;
// reciprocal space sum
CDLJ_energy = 0.5f * apos.w * psA[j].q / PI;
CDLJ_energy *= exp ( - alphaR * alphaR) / SQRT_PI;
// self Ewald Energy
// factor 2 necessary here, because energy is divided by 2 below
// CDLJ_energy += -cSim.epsfac * (psA[i].q * psA[i].q) * cSim.alphaEwald / SQRT_PI;
// if (r != 0) energy = cSim.alphaEwald;
// if (r != 0) energy = apos.w;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
dEdR += apos.w * psA[j].q * invR;
/* E */
CDLJ_energy += apos.w * psA[j].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
......@@ -176,7 +210,11 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif
{
dEdR = 0.0f;
/* E */
CDLJ_energy = 0.0f;
}
/* E */
energy += CDLJ_energy / 2.0;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
......@@ -255,24 +293,36 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[tj].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJ_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
dEdR += apos.w * psA[tj].q * invR;
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJ_energy = 0.0f;
}
#endif
/* E */
energy += CDLJ_energy;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
......@@ -310,24 +360,35 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[j].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJ_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
dEdR += apos.w * psA[j].q * invR;
/* E */
CDLJ_energy += apos.w * psA[j].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJ_energy = 0.0f;
}
#endif
/* E */
energy += CDLJ_energy;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
......@@ -401,16 +462,24 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
sig6 = sig2 * sig2 * sig2;
eps = a.y * psA[tj].eps;
dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJ_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJ_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
dEdR += apos.w * psA[tj].q * invR;
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
......@@ -419,8 +488,12 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
dEdR = 0.0f;
/* E */
CDLJ_energy = 0.0f;
}
/* E */
energy += CDLJ_energy;
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
......@@ -468,4 +541,5 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
pos++;
}
gpu_energies[energyIndex] = energy;
}
......@@ -104,10 +104,14 @@ extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
double kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
unsigned int N = gpu->sim.nonbond_blocks * gpu->sim.nonbond_threads_per_block;
float *energies_dev;
cudaMalloc((void**) &energies_dev, sizeof(float) * N);
// check if Born radii need to be calculated
kClearBornForces(gpu);
......@@ -122,10 +126,10 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, energies_dev);
else
kCalculateCDLJObcGbsaN2Forces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJObcGbsaN2Forces1");
break;
case CUTOFF:
......@@ -149,10 +153,10 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJObcGbsaCutoffForces1");
break;
case PERIODIC:
......@@ -176,11 +180,24 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, energies_dev);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break;
}
double sum = 0.0f;
if (gpu->bReduceEnergies)
{
float *energies = (float*) malloc(sizeof(float) * N);
// double sum = kReduceNonbondEnergies(gpu, energies_dev);
// printf("GPU____Sum: %f\n", sum);
cudaMemcpy(energies, energies_dev, sizeof(float) * N, cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++) sum += energies[i];
// printf("ControlSum: %f\n", sum);
}
cudaFree(energies_dev);
return sum;
}
......@@ -30,7 +30,7 @@
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
__global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit, float * gpu_energies)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
......@@ -38,6 +38,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int energyIndex = blockIdx.x * blockDim.x + threadIdx.x;
float CDLJObcGbsa_energy;
float energy = 0.0f;
#ifdef USE_CUTOFF
float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
#endif
......@@ -98,10 +101,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * psA[j].eps;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * psA[j].q * invR;
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
/* E */
CDLJObcGbsa_energy += factorX;
#endif
dEdR *= invR * invR;
......@@ -115,13 +125,21 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
}
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy / 2.0f;
}
// Add Forces
dx *= dEdR;
dy *= dEdR;
......@@ -156,15 +174,24 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * psA[j].eps;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * psA[j].q * invR;
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
/* E */
CDLJObcGbsa_energy += factorX;
#endif
dEdR *= invR * invR;
if (!(excl & 0x1))
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
// ObcGbsaForce1 part
......@@ -177,18 +204,28 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
#if defined USE_PERIODIC
if (i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#elif defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy / 2.0f;
}
// Add Forces
dx *= dEdR;
dy *= dEdR;
......@@ -271,10 +308,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * psA[tj].eps;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * psA[tj].q * invR;
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
/* E */
CDLJObcGbsa_energy += factorX;
#endif
dEdR *= invR * invR;
......@@ -289,13 +333,21 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[tj].q) / denominator;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
......@@ -336,10 +388,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * psA[j].eps;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * psA[j].q * invR;
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
/* E */
CDLJObcGbsa_energy += factorX;
#endif
dEdR *= invR * invR;
......@@ -353,6 +412,8 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
// Sum the Born forces.
......@@ -371,9 +432,15 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
......@@ -446,15 +513,24 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * psA[tj].eps;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * psA[tj].q * invR;
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
/* E */
CDLJObcGbsa_energy += factorX;
#endif
dEdR *= invR * invR;
if (!(excl & 0x1))
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
// ObcGbsaForce1 part
......@@ -468,18 +544,28 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[tj].q) / denominator;
#if defined USE_PERIODIC
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#elif defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
......@@ -529,4 +615,5 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
}
pos++;
}
gpu_energies[energyIndex] = energy;
}
......@@ -63,6 +63,13 @@ static __constant__ cudaGmxSimulation cSim;
dEdR = param.y * deltaIdeal; \
}
#define GETENERGYGIVENANGLECOSINE(cosine, param, dEdR) \
{ \
float angle = acos(cosine); \
float deltaIdeal = angle - (param.x * (3.14159265f / 180.0f)); \
dEdR = param.y * deltaIdeal * deltaIdeal; \
}
#define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \
{ \
float dp; \
......@@ -109,11 +116,14 @@ void GetCalculateLocalForcesSim(gpuContext gpu)
}
__global__ void kCalculateLocalForces_kernel()
__global__ void kCalculateLocalForces_kernel(float * gpu_energies)
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int energyIndex = pos;
Vectors* A = &sV[threadIdx.x];
float energy = 0.0f;
while (pos < cSim.bond_offset)
{
if (pos < cSim.bonds)
......@@ -128,6 +138,7 @@ __global__ void kCalculateLocalForces_kernel()
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float deltaIdeal = r - bond.x;
/* E */ energy += bond.y * deltaIdeal * deltaIdeal / 2.0;
float dEdR = bond.y * deltaIdeal;
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
// printf("D: %11.4f %11.4f %11.4f %11.4f %11.4f %11.4f\n", dx, dy, dz, r, deltaIdeal, dEdR);
......@@ -149,7 +160,7 @@ __global__ void kCalculateLocalForces_kernel()
}
pos += blockDim.x * gridDim.x;
}
while (pos < cSim.bond_angle_offset)
{
unsigned int pos1 = pos - cSim.bond_offset;
......@@ -174,6 +185,11 @@ __global__ void kCalculateLocalForces_kernel()
float r23 = DOT3(A->v1, A->v1); // dx2 * dx2 + dy2 * dy2 + dz2 * dz2;
float dot = DOT3(A->v0, A->v1); // dx1 * dx2 + dy1 * dy2 + dz1 * dz2;
float cosine = dot / sqrt(r21 * r23);
float angle_energy;
/* E */ GETENERGYGIVENANGLECOSINE(cosine, bond_angle, angle_energy);
energy += angle_energy / 2.0;
float dEdR;
GETPREFACTORSGIVENANGLECOSINE(cosine, bond_angle, dEdR);
//printf("%11.4f %11.4f\n", cosine, dEdR);
......@@ -211,7 +227,7 @@ __global__ void kCalculateLocalForces_kernel()
}
pos += blockDim.x * gridDim.x;
}
while (pos < cSim.dihedral_offset)
{
unsigned int pos1 = pos - cSim.bond_angle_offset;
......@@ -236,6 +252,19 @@ __global__ void kCalculateLocalForces_kernel()
GETDIHEDRALANGLEBETWEENTHREEVECTORS(A->v0, A->v1, A->v2, A->v0, cp0, cp1, dihedralAngle);
float4 dihedral = cSim.pDihedralParameter[pos1];
float deltaAngle = dihedral.z * dihedralAngle - (dihedral.y * 3.14159265f / 180.0f);
// ATTENTION: This section leads to a divergent deltaAngle values wrt
// forces and energies. We separate the case dihedral.z = n = 0, which
// is treated by the calculation of energies via a harmonic potential
/* E */ if (dihedral.z) energy += dihedral.x * (1.0f + cos(deltaAngle));
/* E */ else
{
float deltaAngle = dihedralAngle - dihedral.y;
if (deltaAngle < -M_PI) deltaAngle += 2.0 * M_PI;
else if (deltaAngle > M_PI) deltaAngle -= 2.0 * M_PI;
energy += dihedral.x * deltaAngle * deltaAngle;
}
float sinDeltaAngle = sin(deltaAngle);
float dEdAngle = -dihedral.x * dihedral.z * sinDeltaAngle;
float normCross1 = DOT3(cp0, cp0);
......@@ -293,10 +322,11 @@ __global__ void kCalculateLocalForces_kernel()
force.z += -internalF3.z - s.z;
cSim.pForce4[offset] = force;
//printf("%4d - 2: %9.4f %9.4f %9.4f\n", pos1, cSim.pForce[offset], cSim.pForce[offset + cSim.stride], cSim.pForce[offset + cSim.stride2]);
}
}
pos += blockDim.x * gridDim.x;
}
// Ryckaert Bellemans dihedrals
while (pos < cSim.rb_dihedral_offset)
{
unsigned int pos1 = pos - cSim.dihedral_offset;
......@@ -336,17 +366,25 @@ __global__ void kCalculateLocalForces_kernel()
float2 dihedral2 = cSim.pRbDihedralParameter2[pos1];
float cosFactor = cosPhi;
float dEdAngle = -dihedral1.y;
/* E */ float rb_energy = dihedral1.x;
rb_energy += dihedral1.y * cosFactor;
// printf("%4d - 1: %9.4f %9.4f\n", pos1, dEdAngle, 1.0f);
dEdAngle -= 2.0f * dihedral1.z * cosFactor;
// printf("%4d - 2: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 3.0f * dihedral1.w * cosFactor;
// printf("%4d - 3: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
rb_energy += dihedral1.z * cosFactor;
// printf("%4d - 3: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 4.0f * dihedral2.x * cosFactor;
// printf("%4d - 4: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
rb_energy += dihedral1.w * cosFactor;
// printf("%4d - 4: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 5.0f * dihedral2.y * cosFactor;
rb_energy += dihedral2.x * cosFactor;
rb_energy += dihedral2.y * cosFactor * cosPhi;
/* E */ energy += rb_energy;
// printf("%4d - 5: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
dEdAngle *= sin(dihedralAngle);
// printf("%4d - f: %9.4f\n", pos1, dEdAngle);
......@@ -430,6 +468,10 @@ __global__ void kCalculateLocalForces_kernel()
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
/* E */
energy = LJ14.x * (sig6 - 1.0f) * sig6;
energy += LJ14.z * inverseR;
dEdR += LJ14.z * inverseR;
dEdR *= inverseR * inverseR;
unsigned int offsetA = atom.x + atom.z * cSim.stride;
......@@ -453,6 +495,7 @@ __global__ void kCalculateLocalForces_kernel()
}
else if (cSim.nonbondedMethod == CUTOFF)
{
float LJ14_energy;
while (pos < cSim.LJ14_offset)
{
unsigned int pos1 = pos - cSim.rb_dihedral_offset;
......@@ -471,13 +514,21 @@ __global__ void kCalculateLocalForces_kernel()
float sig2 = inverseR * LJ14.y;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
/* E */
LJ14_energy = LJ14.x * (sig6 - 1.0f) * sig6;
LJ14_energy += LJ14.z * (inverseR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
dEdR += LJ14.z * (inverseR - 2.0f * cSim.reactionFieldK * r2);
dEdR *= inverseR * inverseR;
if (r2 > cSim.nonbondedCutoffSqr)
{
{
dEdR = 0.0f;
/* E */
LJ14_energy = 0.0f;
}
/* E */
energy += LJ14_energy;
unsigned int offsetA = atom.x + atom.z * cSim.stride;
unsigned int offsetB = atom.y + atom.w * cSim.stride;
float4 forceA = cSim.pForce4[offsetA];
......@@ -499,6 +550,7 @@ __global__ void kCalculateLocalForces_kernel()
}
else if (cSim.nonbondedMethod == PERIODIC)
{
float LJ14_energy;
while (pos < cSim.LJ14_offset)
{
unsigned int pos1 = pos - cSim.rb_dihedral_offset;
......@@ -521,12 +573,21 @@ __global__ void kCalculateLocalForces_kernel()
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
/* E */
LJ14_energy = LJ14.x * (sig6 - 1.0f) * sig6;
LJ14_energy += LJ14.z * (inverseR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
dEdR += LJ14.z * (inverseR - 2.0f * cSim.reactionFieldK * r2);
dEdR *= inverseR * inverseR;
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
LJ14_energy = 0.0f;
}
/* E */
energy += LJ14_energy;
unsigned int offsetA = atom.x + atom.z * cSim.stride;
unsigned int offsetB = atom.y + atom.w * cSim.stride;
float4 forceA = cSim.pForce4[offsetA];
......@@ -546,13 +607,30 @@ __global__ void kCalculateLocalForces_kernel()
pos += blockDim.x * gridDim.x;
}
}
gpu_energies[energyIndex] = energy;
}
void kCalculateLocalForces(gpuContext gpu)
double kCalculateLocalForces(gpuContext gpu)
{
// printf("kCalculateLocalForces\n");
kCalculateLocalForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, gpu->sim.localForces_threads_per_block * sizeof(Vectors)>>>();
unsigned int N = gpu->sim.blocks * gpu->sim.localForces_threads_per_block;
float *energies_dev;
cudaMalloc((void**) &energies_dev, sizeof(float) * N);
kCalculateLocalForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, gpu->sim.localForces_threads_per_block * sizeof(Vectors)>>>(energies_dev);
LAUNCHERROR("kCalculateLocalForces");
double sum = 0.0f;
if (gpu->bReduceEnergies)
{
// sum = kReduceLocalEnergies(gpu, energies_dev);
float *energies = (float*) malloc(sizeof(float) * N);
memset(energies, 0, sizeof(float) * N);
cudaMemcpy(energies, energies_dev, sizeof(float) * N, cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++) sum += energies[i];
}
cudaFree(energies_dev);
return sum;
}
......@@ -254,10 +254,57 @@ void kReduceForces(gpuContext gpu)
LAUNCHERROR("kReduceForces");
}
__global__ void kReduceEnergies_kernel(float * gpu_energies)
{
// GPU energy reduction algorithm goes here
// unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
}
double kReduceLocalEnergies(gpuContext gpu, float * gpu_energies)
{
// printf("kReduceLocalEnergies\n");
unsigned int N = gpu->sim.blocks * gpu->sim.localForces_threads_per_block;
float *energies = (float*) malloc(sizeof(float) * N);
//kReduceEnergies_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block>>>(gpu_energies);
LAUNCHERROR("kReduceLocalEnergies");
cudaMemcpy(energies, gpu_energies, sizeof(float) * N, cudaMemcpyDeviceToHost);
double sum = 0.0f;
for (int i = 0; i < N; i++) sum += energies[i];
return sum;
}
double kReduceNonbondEnergies(gpuContext gpu, float * gpu_energies)
{
// printf("kReduceNonbondEnergies\n");
unsigned int N = gpu->sim.nonbond_blocks * gpu->sim.nonbond_threads_per_block;
float *energies = (float*) malloc(sizeof(float) * N);
//kReduceEnergies_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>(gpu_energies);
LAUNCHERROR("kReduceNonbondEnergies");
cudaMemcpy(energies, gpu_energies, sizeof(float) * N, cudaMemcpyDeviceToHost);
double sum = 0.0f;
for (int i = 0; i < N; i++) sum += energies[i];
return sum;
}
__global__ void kReduceObcGbsaBornForces_kernel()
double kReduceObcGbsaBornEnergies(gpuContext gpu, float * gpu_energies)
{
// printf("kReduceObcGbsaBornEnergies\n");
unsigned int N = gpu->sim.blocks * gpu->sim.bf_reduce_threads_per_block;
float *energies = (float*) malloc(sizeof(float) * N);
//kReduceEnergies_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>(gpu_energies);
LAUNCHERROR("kReduceObcGbsaBornEnergies");
cudaMemcpy(energies, gpu_energies, sizeof(float) * N, cudaMemcpyDeviceToHost);
double sum = 0.0f;
for (int i = 0; i < N; i++) sum += energies[i];
return sum;
}
__global__ void kReduceObcGbsaBornForces_kernel(float * gpu_energies)
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
unsigned int energyIndex = pos;
float energy = 0.0f;
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
......@@ -299,19 +346,40 @@ __global__ void kReduceObcGbsaBornForces_kernel()
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
/* E */
energy += saTerm;
totalForce *= bornRadius * bornRadius * obcChain;
pFt = cSim.pBornForce + pos;
*pFt = totalForce;
pos += gridDim.x * blockDim.x;
}
/* E */
// correct for surface area factor of -6
gpu_energies[energyIndex] = energy / -6.0f;
}
void kReduceObcGbsaBornForces(gpuContext gpu)
double kReduceObcGbsaBornForces(gpuContext gpu)
{
unsigned int N = gpu->sim.blocks * gpu->sim.bf_reduce_threads_per_block;
float *energies_dev;
cudaMalloc((void**) &energies_dev, sizeof(float) * N);
//printf("kReduceObcGbsaBornForces\n");
kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>(energies_dev);
LAUNCHERROR("kReduceObcGbsaBornForces");
float sum = 0.0f;
if (gpu->bReduceEnergies)
{
// double sum = kReduceObcGbsaBornEnergies(gpu, energies_dev);
float *energies = (float*) malloc(sizeof(float) * N);
cudaMemcpy(energies, energies_dev, sizeof(float) * N, cudaMemcpyDeviceToHost);
for (int i = 0; i < N; i++) sum += energies[i];
}
cudaFree(energies_dev);
return sum;
}
......@@ -257,6 +257,7 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
if( totalEnergy )
*totalEnergy -= selfEwaldEnergy;
// printf("- selfEwaldEnergy: %f aEwald %f p1 %f\n", -selfEwaldEnergy, alphaEwald, atomParameters[atomID][QIndex]);
if( energyByAtom ){
energyByAtom[atomID] -= selfEwaldEnergy;
......@@ -266,9 +267,9 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
RealOpenMM tempStore = *totalEnergy;
// PME
if (pme) {
pme_t pmedata; /* abstract handle for PME data */
int ngrid[3];
......@@ -409,6 +410,8 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
SimTKOpenMMLog::printError( message );
}
// printf("recipEnergy: %f\n", *totalEnergy - tempStore);
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
......@@ -448,10 +451,12 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// accumulate energies
realSpaceEwaldEnergy = atomParameters[ii][QIndex]*atomParameters[jj][QIndex]*inverseR*erfc(alphaR);
// printf("realSpaceEwald: %f p1 %f p2 %f invR %f erfc(aR) %f aR %f\n", realSpaceEwaldEnergy, atomParameters[ii][QIndex], atomParameters[jj][QIndex], inverseR, erfc(alphaR), alphaR);
vdwEnergy = eps*(sig6-one)*sig6;
if( totalEnergy )
*totalEnergy += realSpaceEwaldEnergy + vdwEnergy;
if( energyByAtom ){
energyByAtom[ii] += realSpaceEwaldEnergy + vdwEnergy;
......
......@@ -416,7 +416,7 @@ int CpuObc::computeBornEnergyForces( RealOpenMM* bornRadii, RealOpenMM** atomCoo
if( includeAceApproximation() ){
computeAceNonPolarForce( obcParameters, bornRadii, &obcEnergy, bornForces );
}
// ---------------------------------------------------------------------------------------
// first main loop
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment