Commit 1acb7fa9 authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor improvements to speed and accuracy

parent 072fc120
......@@ -1740,49 +1740,49 @@ void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT
gpu->sim.tau = tau;
gpu->sim.T = temperature;
gpu->sim.kT = BOLTZ * gpu->sim.T;
float GDT = gpu->sim.deltaT / gpu->sim.tau;
float EPH = exp(0.5f * GDT);
float EMH = exp(-0.5f * GDT);
float EP = exp(GDT);
float EM = exp(-GDT);
float B, C, D;
if (GDT >= 0.1f)
double GDT = gpu->sim.deltaT / gpu->sim.tau;
double EPH = exp(0.5 * GDT);
double EMH = exp(-0.5 * GDT);
double EP = exp(GDT);
double EM = exp(-GDT);
double B, C, D;
if (GDT >= 0.1)
{
float term1 = EPH - 1.0f;
double term1 = EPH - 1.0;
term1 *= term1;
B = GDT * (EP - 1.0f) - 4.0f * term1;
C = GDT - 3.0f + 4.0f * EMH - EM;
D = 2.0f - EPH - EMH;
B = GDT * (EP - 1.0) - 4.0 * term1;
C = GDT - 3.0 + 4.0 * EMH - EM;
D = 2.0 - EPH - EMH;
}
else
{
float term1 = 0.5f * GDT;
float term2 = term1 * term1;
float term4 = term2 * term2;
float third = 1.0f / 3.0f;
float o7_9 = 7.0f / 9.0f;
float o1_12 = 1.0f / 12.0f;
float o17_90 = 17.0f / 90.0f;
float o7_30 = 7.0f / 30.0f;
float o31_1260 = 31.0f / 1260.0f;
float o_360 = 1.0f / 360.0f;
double term1 = 0.5 * GDT;
double term2 = term1 * term1;
double term4 = term2 * term2;
double third = 1.0 / 3.0;
double o7_9 = 7.0 / 9.0;
double o1_12 = 1.0 / 12.0;
double o17_90 = 17.0 / 90.0;
double o7_30 = 7.0 / 30.0;
double o31_1260 = 31.0 / 1260.0;
double o_360 = 1.0 / 360.0;
B = term4 * (third + term1 * (third + term1 * (o17_90 + term1 * o7_9)));
C = term2 * term1 * (2.0f * third + term1 * (-0.5f + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
D = term2 * (-1.0f + term2 * (-o1_12 - term2 * o_360));
}
float DOverTauC = D / (gpu->sim.tau * C);
float TauOneMinusEM = gpu->sim.tau * (1.0f-EM);
float TauDOverEMMinusOne = gpu->sim.tau * D / (EM - 1.0f);
float fix1 = gpu->sim.tau * (EPH - EMH);
if (fix1 == 0.0f)
C = term2 * term1 * (2.0 * third + term1 * (-0.5 + term1 * (o7_30 + term1 * (-o1_12 + term1 * o31_1260))));
D = term2 * (-1.0 + term2 * (-o1_12 - term2 * o_360));
}
double DOverTauC = D / (gpu->sim.tau * C);
double TauOneMinusEM = gpu->sim.tau * (1.0-EM);
double TauDOverEMMinusOne = gpu->sim.tau * D / (EM - 1.0);
double fix1 = gpu->sim.tau * (EPH - EMH);
if (fix1 == 0.0)
fix1 = deltaT;
float oneOverFix1 = 1.0f / fix1;
float V = sqrt(gpu->sim.kT * (1.0f - EM));
float X = gpu->sim.tau * sqrt(gpu->sim.kT * C);
float Yv = sqrt(gpu->sim.kT * B / C);
float Yx = gpu->sim.tau * sqrt(gpu->sim.kT * B / (1.0f - EM));
double oneOverFix1 = 1.0 / fix1;
double V = sqrt(gpu->sim.kT * (1.0 - EM));
double X = gpu->sim.tau * sqrt(gpu->sim.kT * C);
double Yv = sqrt(gpu->sim.kT * B / C);
double Yx = gpu->sim.tau * sqrt(gpu->sim.kT * B / (1.0 - EM));
(*gpu->psLangevinParameters)[0] = EM;
(*gpu->psLangevinParameters)[1] = EM;
(*gpu->psLangevinParameters)[2] = DOverTauC;
......@@ -2192,7 +2192,6 @@ void gpuBuildExclusionList(gpuContext gpu)
for (int atom2 = 0; atom2 < (int)atoms; ++atom2)
{
int y = atom2/grid;
int index = x*atoms+y*grid+offset1;
int offset2 = atom2-y*grid;
if (x >= y)
{
......
......@@ -72,9 +72,9 @@ __global__ void kApplyFirstSettle_kernel()
float4 xp1 = cSim.pPosqP[atomID.y];
float4 apos2 = cSim.pOldPosq[atomID.z];
float4 xp2 = cSim.pPosqP[atomID.z];
float m0 = 1.0/cSim.pVelm4[atomID.x].w;
float m1 = 1.0/cSim.pVelm4[atomID.y].w;
float m2 = 1.0/cSim.pVelm4[atomID.z].w;
float m0 = 1.0f/cSim.pVelm4[atomID.x].w;
float m1 = 1.0f/cSim.pVelm4[atomID.y].w;
float m2 = 1.0f/cSim.pVelm4[atomID.z].w;
// Translate the molecule to the origin to improve numerical precision.
......@@ -150,23 +150,23 @@ __global__ void kApplyFirstSettle_kernel()
// --- Step2 A2' ---
float rc = 0.5*params.y;
float rc = 0.5f*params.y;
float rb = sqrt(params.x*params.x-rc*rc);
float ra = rb*(m1+m2)/totalMass;
rb -= ra;
float sinphi = za1d / ra;
float cosphi = sqrt(1.0 - sinphi*sinphi);
float cosphi = sqrt(1.0f - sinphi*sinphi);
float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi);
float cospsi = sqrt(1.0 - sinpsi*sinpsi);
float cospsi = sqrt(1.0f - sinpsi*sinpsi);
float ya2d = ra * cosphi;
float xb2d = - rc * cospsi;
float yb2d = - rb * cosphi - rc *sinpsi * sinphi;
float yc2d = - rb * cosphi + rc *sinpsi * sinphi;
float xb2d2 = xb2d * xb2d;
float hh2 = 4.0 * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0 * xb2d + sqrt ( 4.0 * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5;
float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0f * xb2d + sqrt ( 4.0f * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5f;
// --- Step3 al,be,ga ---
......@@ -179,7 +179,7 @@ __global__ void kApplyFirstSettle_kernel()
// --- Step4 A3' ---
float costhe = sqrt (1.0 - sinthe * sinthe );
float costhe = sqrt (1.0f - sinthe * sinthe );
float xa3d = - ya2d * sinthe;
float ya3d = ya2d * costhe;
float za3d = za1d;
......@@ -244,9 +244,9 @@ __global__ void kApplySecondSettle_kernel()
float4 xp1 = cSim.pPosq[atomID.y];
float4 apos2 = cSim.pOldPosq[atomID.z];
float4 xp2 = cSim.pPosq[atomID.z];
float m0 = 1.0/cSim.pVelm4[atomID.x].w;
float m1 = 1.0/cSim.pVelm4[atomID.y].w;
float m2 = 1.0/cSim.pVelm4[atomID.z].w;
float m0 = 1.0f/cSim.pVelm4[atomID.x].w;
float m1 = 1.0f/cSim.pVelm4[atomID.y].w;
float m2 = 1.0f/cSim.pVelm4[atomID.z].w;
float xb0 = apos1.x-apos0.x;
......@@ -308,23 +308,23 @@ __global__ void kApplySecondSettle_kernel()
// --- Step2 A2' ---
float rc = 0.5*params.y;
float rc = 0.5f*params.y;
float rb = sqrt(params.x*params.x-rc*rc);
float ra = rb*(m1+m2)/totalMass;
rb -= ra;
float sinphi = za1d / ra;
float cosphi = sqrt(1.0 - sinphi*sinphi);
float cosphi = sqrt(1.0f - sinphi*sinphi);
float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi);
float cospsi = sqrt(1.0 - sinpsi*sinpsi);
float cospsi = sqrt(1.0f - sinpsi*sinpsi);
float ya2d = ra * cosphi;
float xb2d = - rc * cospsi;
float yb2d = - rb * cosphi - rc *sinpsi * sinphi;
float yc2d = - rb * cosphi + rc *sinpsi * sinphi;
float xb2d2 = xb2d * xb2d;
float hh2 = 4.0 * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0 * xb2d + sqrt ( 4.0 * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5;
float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0f * xb2d + sqrt ( 4.0f * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5f;
// --- Step3 al,be,ga ---
......@@ -337,7 +337,7 @@ __global__ void kApplySecondSettle_kernel()
// --- Step4 A3' ---
float costhe = sqrt (1.0 - sinthe * sinthe );
float costhe = sqrt (1.0f - sinthe * sinthe );
float xa3d = - ya2d * sinthe;
float ya3d = ya2d * costhe;
float za3d = za1d;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment