Commit 470c9b7f authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Renamed math functions in Cuda kernels to single-precision equivalent [cos() -> cosf()]

parent fed50628
...@@ -57,9 +57,9 @@ __global__ void kScaleAtomCoordinates_kernel(float scale, int numMolecules, floa ...@@ -57,9 +57,9 @@ __global__ void kScaleAtomCoordinates_kernel(float scale, int numMolecules, floa
// Move it into the first periodic box. // Move it into the first periodic box.
int xcell = (int) floor(center.x*invPeriodicBoxSize.x); int xcell = (int) floorf(center.x*invPeriodicBoxSize.x);
int ycell = (int) floor(center.y*invPeriodicBoxSize.y); int ycell = (int) floorf(center.y*invPeriodicBoxSize.y);
int zcell = (int) floor(center.z*invPeriodicBoxSize.z); int zcell = (int) floorf(center.z*invPeriodicBoxSize.z);
float3 delta = make_float3(xcell*periodicBoxSize.x, ycell*periodicBoxSize.y, zcell*periodicBoxSize.z); float3 delta = make_float3(xcell*periodicBoxSize.x, ycell*periodicBoxSize.y, zcell*periodicBoxSize.z);
center.x -= delta.x; center.x -= delta.x;
center.y -= delta.y; center.y -= delta.y;
......
...@@ -94,7 +94,7 @@ void kGenerateRandoms_kernel() ...@@ -94,7 +94,7 @@ void kGenerateRandoms_kernel()
state.y ^= state.y << 13; state.y ^= state.y << 13;
state.y ^= state.y >> 17; state.y ^= state.y >> 17;
state.y ^= state.y << 5; state.y ^= state.y << 5;
x1 = sqrt(-2.0f * log(x1)); x1 = sqrtf(-2.0f * logf(x1));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2); k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry; m = state.w + state.w + state.z + carry;
state.z = state.w; state.z = state.w;
...@@ -106,7 +106,7 @@ void kGenerateRandoms_kernel() ...@@ -106,7 +106,7 @@ void kGenerateRandoms_kernel()
state.y ^= state.y << 13; state.y ^= state.y << 13;
state.y ^= state.y >> 17; state.y ^= state.y >> 17;
state.y ^= state.y << 5; state.y ^= state.y << 5;
sRand[pos1].x = x1 * cos(2.0f * 3.14159265f * x2); sRand[pos1].x = x1 * cosf(2.0f * 3.14159265f * x2);
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2); k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry; m = state.w + state.w + state.z + carry;
state.z = state.w; state.z = state.w;
...@@ -117,7 +117,7 @@ void kGenerateRandoms_kernel() ...@@ -117,7 +117,7 @@ void kGenerateRandoms_kernel()
state.y ^= state.y << 13; state.y ^= state.y << 13;
state.y ^= state.y >> 17; state.y ^= state.y >> 17;
state.y ^= state.y << 5; state.y ^= state.y << 5;
x3 = sqrt(-2.0f * log(x3)); x3 = sqrtf(-2.0f * logf(x3));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2); k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry; m = state.w + state.w + state.z + carry;
state.z = state.w; state.z = state.w;
...@@ -129,7 +129,7 @@ void kGenerateRandoms_kernel() ...@@ -129,7 +129,7 @@ void kGenerateRandoms_kernel()
state.y ^= state.y << 13; state.y ^= state.y << 13;
state.y ^= state.y >> 17; state.y ^= state.y >> 17;
state.y ^= state.y << 5; state.y ^= state.y << 5;
sRand[pos1].y = x3 * cos(2.0f * 3.14159265f * x4); sRand[pos1].y = x3 * cosf(2.0f * 3.14159265f * x4);
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2); k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry; m = state.w + state.w + state.z + carry;
state.z = state.w; state.z = state.w;
...@@ -140,14 +140,14 @@ void kGenerateRandoms_kernel() ...@@ -140,14 +140,14 @@ void kGenerateRandoms_kernel()
state.y ^= state.y << 13; state.y ^= state.y << 13;
state.y ^= state.y >> 17; state.y ^= state.y >> 17;
state.y ^= state.y << 5; state.y ^= state.y << 5;
x5 = sqrt(-2.0f * log(x5)); x5 = sqrtf(-2.0f * logf(x5));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2); k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry; m = state.w + state.w + state.z + carry;
state.z = state.w; state.z = state.w;
state.w = m; state.w = m;
carry = k >> 30; carry = k >> 30;
float x6 = (float)(state.x + state.y + state.w) / (float)0xffffffff; float x6 = (float)(state.x + state.y + state.w) / (float)0xffffffff;
sRand[pos1].z = x5 * cos(2.0f * 3.14159265f * x6); sRand[pos1].z = x5 * cosf(2.0f * 3.14159265f * x6);
pos1 += blockDim.x; pos1 += blockDim.x;
} }
......
...@@ -131,9 +131,9 @@ void kApplySettle_kernel() ...@@ -131,9 +131,9 @@ void kApplySettle_kernel()
float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd; float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd; float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
float axlng = sqrt(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd); float axlng = sqrtf(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd);
float aylng = sqrt(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd); float aylng = sqrtf(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd);
float azlng = sqrt(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd); float azlng = sqrtf(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd);
float trns11 = xaksXd / axlng; float trns11 = xaksXd / axlng;
float trns21 = yaksXd / axlng; float trns21 = yaksXd / axlng;
float trns31 = zaksXd / axlng; float trns31 = zaksXd / axlng;
...@@ -159,13 +159,13 @@ void kApplySettle_kernel() ...@@ -159,13 +159,13 @@ void kApplySettle_kernel()
// --- Step2 A2' --- // --- Step2 A2' ---
float rc = 0.5f*params.y; float rc = 0.5f*params.y;
float rb = sqrt(params.x*params.x-rc*rc); float rb = sqrtf(params.x*params.x-rc*rc);
float ra = rb*(m1+m2)/totalMass; float ra = rb*(m1+m2)/totalMass;
rb -= ra; rb -= ra;
float sinphi = za1d / ra; float sinphi = za1d / ra;
float cosphi = sqrt(1.0f - sinphi*sinphi); float cosphi = sqrtf(1.0f - sinphi*sinphi);
float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi); float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi);
float cospsi = sqrt(1.0f - sinpsi*sinpsi); float cospsi = sqrtf(1.0f - sinpsi*sinpsi);
float ya2d = ra * cosphi; float ya2d = ra * cosphi;
float xb2d = - rc * cospsi; float xb2d = - rc * cospsi;
...@@ -173,7 +173,7 @@ void kApplySettle_kernel() ...@@ -173,7 +173,7 @@ void kApplySettle_kernel()
float yc2d = - rb * cosphi + rc *sinpsi * sinphi; float yc2d = - rb * cosphi + rc *sinpsi * sinphi;
float xb2d2 = xb2d * xb2d; float xb2d2 = xb2d * xb2d;
float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d); float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d);
float deltx = 2.0f * xb2d + sqrt ( 4.0f * xb2d2 - hh2 + params.y*params.y ); float deltx = 2.0f * xb2d + sqrtf( 4.0f * xb2d2 - hh2 + params.y*params.y );
xb2d -= deltx * 0.5f; xb2d -= deltx * 0.5f;
// --- Step3 al,be,ga --- // --- Step3 al,be,ga ---
...@@ -183,11 +183,11 @@ void kApplySettle_kernel() ...@@ -183,11 +183,11 @@ void kApplySettle_kernel()
float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d;
float al2be2 = alpa * alpa + beta * beta; float al2be2 = alpa * alpa + beta * beta;
float sinthe = ( alpa*gama - beta * sqrt ( al2be2 - gama * gama ) ) / al2be2; float sinthe = ( alpa*gama - beta * sqrtf( al2be2 - gama * gama ) ) / al2be2;
// --- Step4 A3' --- // --- Step4 A3' ---
float costhe = sqrt (1.0f - sinthe * sinthe ); float costhe = sqrtf(1.0f - sinthe * sinthe );
float xa3d = - ya2d * sinthe; float xa3d = - ya2d * sinthe;
float ya3d = ya2d * costhe; float ya3d = ya2d * costhe;
float za3d = za1d; float za3d = za1d;
......
...@@ -96,7 +96,7 @@ void RNG::zigset() ...@@ -96,7 +96,7 @@ void RNG::zigset()
wn[0] = q / m1; wn[127] = tn / m1; wn[0] = q / m1; wn[127] = tn / m1;
fn[0]=1.; fn[127] = exp(-.5 * tn * tn); fn[0]=1.; fn[127] = exp(-.5 * tn * tn);
for (uint i = 126; i > 0; i--) { for (uint i = 126; i > 0; i--) {
const double dn = sqrt(-2 * log(vn / tn + exp(-.5 * tn * tn))); const double dn = sqrtf(-2 * log(vn / tn + exp(-.5 * tn * tn)));
kn[i + 1] = ULONG32((dn / tn) * m1); kn[i + 1] = ULONG32((dn / tn) * m1);
fn[i] = exp(-.5 * dn * dn); fn[i] = exp(-.5 * dn * dn);
wn[i] = dn / m1; wn[i] = dn / m1;
...@@ -130,7 +130,7 @@ double RNG::gamma(double shape, double scale) ...@@ -130,7 +130,7 @@ double RNG::gamma(double shape, double scale)
return gamma(shape + 1., scale) * pow(rand_open01(), 1.0 / shape); return gamma(shape + 1., scale) * pow(rand_open01(), 1.0 / shape);
const double d = shape - 1. / 3.; const double d = shape - 1. / 3.;
const double c = 1. / sqrt(9. * d); const double c = 1. / sqrtf(9. * d);
double x, v; double x, v;
for (;;) { for (;;) {
do { do {
...@@ -186,7 +186,7 @@ int RNG::poisson(double mu) ...@@ -186,7 +186,7 @@ int RNG::poisson(double mu)
if (big_mu) { if (big_mu) {
new_big_mu = 1; new_big_mu = 1;
muprev = mu; muprev = mu;
s = sqrt(mu); s = sqrtf(mu);
d = 6. * mu * mu; d = 6. * mu * mu;
big_l = floor(mu - 1.1484); big_l = floor(mu - 1.1484);
} }
...@@ -240,7 +240,7 @@ int RNG::poisson(double mu) ...@@ -240,7 +240,7 @@ int RNG::poisson(double mu)
if (new_big_mu || mu != muprev2) { if (new_big_mu || mu != muprev2) {
muprev2 = mu; muprev2 = mu;
omega = 1.0 / (sqrt(2.0 * PI) * s); omega = 1.0 / (sqrtf(2.0 * PI) * s);
b1 = one_24 / mu; b1 = one_24 / mu;
b2 = 0.3 * b1 * b1; b2 = 0.3 * b1 * b1;
c3 = one_7 * b1 * b2; c3 = one_7 * b1 * b2;
...@@ -280,7 +280,7 @@ Step_F: ...@@ -280,7 +280,7 @@ Step_F:
- del; - del;
else else
px = fk * log(1. + v) - difmuk - del; px = fk * log(1. + v) - difmuk - del;
py = 1.0 / (sqrt(2.0 * PI) * sqrt(fk)); py = 1.0 / (sqrtf(2.0 * PI) * sqrtf(fk));
} }
x = (0.5 - difmuk) / s; x = (0.5 - difmuk) / s;
x *= x;/* x^2 */ x *= x;/* x^2 */
...@@ -332,7 +332,7 @@ int RNG::binomial(double pp, int n) ...@@ -332,7 +332,7 @@ int RNG::binomial(double pp, int n)
m = (int) ffm; m = (int) ffm;
fm = m; fm = m;
npq = np * q; npq = np * q;
p1 = (int)(2.195 * sqrt(npq) - 4.6 * q) + 0.5; p1 = (int)(2.195 * sqrtf(npq) - 4.6 * q) + 0.5;
xm = fm + 0.5; xm = fm + 0.5;
xl = xm - p1; xl = xm - p1;
xr = xm + p1; xr = xm + p1;
......
...@@ -156,7 +156,7 @@ __device__ void calculateFixedGkFieldPairIxn_kernel( float4 atomCoordinatesI, ...@@ -156,7 +156,7 @@ __device__ void calculateFixedGkFieldPairIxn_kernel( float4 atomCoordinatesI,
qyzk = labFrameQuadrupoleJ[5]; qyzk = labFrameQuadrupoleJ[5];
qzzk = labFrameQuadrupoleJ[8]; qzzk = labFrameQuadrupoleJ[8];
expterm = exp(-r2/(gkc*rb2)); expterm = expf(-r2/(gkc*rb2));
expc = expterm / gkc; expc = expterm / gkc;
dexpc = -2.0f / (gkc*rb2); dexpc = -2.0f / (gkc*rb2);
gf2 = 1.0f / (r2+rb2*expterm); gf2 = 1.0f / (r2+rb2*expterm);
......
...@@ -105,7 +105,7 @@ __device__ void calculateGrycukBornRadiiPairIxn_kernel( GrycukParticle& atomI, G ...@@ -105,7 +105,7 @@ __device__ void calculateGrycukBornRadiiPairIxn_kernel( GrycukParticle& atomI, G
zr = atomJ.z - atomI.z; zr = atomJ.z - atomI.z;
r2 = xr*xr + yr*yr + zr*zr; r2 = xr*xr + yr*yr + zr*zr;
r = sqrt(r2); r = sqrtf(r2);
sk = atomJ.scaledRadius; sk = atomJ.scaledRadius;
sk2 = sk*sk; sk2 = sk*sk;
...@@ -170,7 +170,7 @@ void kReduceGrycukGbsaBornSum_kernel() ...@@ -170,7 +170,7 @@ void kReduceGrycukGbsaBornSum_kernel()
float radius = cSim.pObcData[pos].x; float radius = cSim.pObcData[pos].x;
radius = 1.0f/(radius*radius*radius); radius = 1.0f/(radius*radius*radius);
sum = radius - sum; sum = radius - sum;
sum = sum <= 0.0f ? 1000.0f : pow( sum, -1.0f/3.0f ); sum = sum <= 0.0f ? 1000.0f : powf( sum, -1.0f/3.0f );
cSim.pBornRadii[pos] = sum; cSim.pBornRadii[pos] = sum;
pos += gridDim.x * blockDim.x; pos += gridDim.x * blockDim.x;
...@@ -305,9 +305,9 @@ __device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle& ...@@ -305,9 +305,9 @@ __device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle&
float lik, uik; float lik, uik;
float lik4, uik4; float lik4, uik4;
float factor = -pow(pi,third)*pow(6.0f,(2.0f*third))/9.0f; float factor = -powf(pi,third)*powf(6.0f,(2.0f*third))/9.0f;
float term = pi43/(atomI.bornRadius*atomI.bornRadius*atomI.bornRadius); float term = pi43/(atomI.bornRadius*atomI.bornRadius*atomI.bornRadius);
term = factor/pow( term, (4.0f*third) ); term = factor/powf( term, (4.0f*third) );
float xr = atomJ.x - atomI.x; float xr = atomJ.x - atomI.x;
float yr = atomJ.y - atomI.y; float yr = atomJ.y - atomI.y;
...@@ -316,7 +316,7 @@ __device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle& ...@@ -316,7 +316,7 @@ __device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle&
float sk = atomJ.scaledRadius; float sk = atomJ.scaledRadius;
float sk2 = sk*sk; float sk2 = sk*sk;
float r2 = xr*xr + yr*yr + zr*zr; float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrt(r2); float r = sqrtf(r2);
float de = 0.0f; float de = 0.0f;
if( (atomI.radius + r) < sk ){ if( (atomI.radius + r) < sk ){
......
...@@ -172,7 +172,7 @@ __device__ void calculateKirkwoodPairIxnOrig_kernel( KirkwoodParticle& atomI, ...@@ -172,7 +172,7 @@ __device__ void calculateKirkwoodPairIxnOrig_kernel( KirkwoodParticle& atomI,
dexpcr = 2.0f / (cAmoebaSim.gkc*rb2*rb2); dexpcr = 2.0f / (cAmoebaSim.gkc*rb2*rb2);
dgfdr = 0.5f * expterm * (1.0f+r2/(rb2*cAmoebaSim.gkc)); dgfdr = 0.5f * expterm * (1.0f+r2/(rb2*cAmoebaSim.gkc));
gf2 = 1.0f / (r2+rb2*expterm); gf2 = 1.0f / (r2+rb2*expterm);
gf = sqrt(gf2); gf = sqrtf(gf2);
gf3 = gf2 * gf; gf3 = gf2 * gf;
gf5 = gf3 * gf2; gf5 = gf3 * gf2;
gf7 = gf5 * gf2; gf7 = gf5 * gf2;
......
...@@ -186,9 +186,9 @@ __device__ void calculateKirkwoodEDiffPairIxnOrig_kernel( KirkwoodEDiffParticle& ...@@ -186,9 +186,9 @@ __device__ void calculateKirkwoodEDiffPairIxnOrig_kernel( KirkwoodEDiffParticle&
scale7 = 1.0f - (1.0f - damp + 0.6f*damp2)*dampE; scale7 = 1.0f - (1.0f - damp + 0.6f*damp2)*dampE;
//scale9 = 1.0f - (1.0f - damp + (18.0f*damp2 - (9.0f*damp*damp2))/35.0f)*dampE; //scale9 = 1.0f - (1.0f - damp + (18.0f*damp2 - (9.0f*damp*damp2))/35.0f)*dampE;
ddsc3_1 = -3.0f*damp*exp(damp) * xr/r2; ddsc3_1 = -3.0f*damp*expf(damp) * xr/r2;
ddsc3_2 = -3.0f*damp*exp(damp) * yr/r2; ddsc3_2 = -3.0f*damp*expf(damp) * yr/r2;
ddsc3_3 = -3.0f*damp*exp(damp) * zr/r2; ddsc3_3 = -3.0f*damp*expf(damp) * zr/r2;
ddsc5_1 = -damp * ddsc3_1; ddsc5_1 = -damp * ddsc3_1;
ddsc5_2 = -damp * ddsc3_2; ddsc5_2 = -damp * ddsc3_2;
......
...@@ -69,9 +69,9 @@ __device__ void SUB_METHOD_NAME( calculateKirkwoodEDiffPairIxn, _kernel)( Kirkwo ...@@ -69,9 +69,9 @@ __device__ void SUB_METHOD_NAME( calculateKirkwoodEDiffPairIxn, _kernel)( Kirkwo
scale7 = 1.0f - (1.0f - damp + 0.6f*damp2)*dampE; scale7 = 1.0f - (1.0f - damp + 0.6f*damp2)*dampE;
#ifdef F1 #ifdef F1
ddsc3_1 = -3.0f*damp*exp(damp)*xr*rr2*rr3; ddsc3_1 = -3.0f*damp*expf(damp)*xr*rr2*rr3;
ddsc3_2 = -3.0f*damp*exp(damp)*yr*rr2*rr3; ddsc3_2 = -3.0f*damp*expf(damp)*yr*rr2*rr3;
ddsc3_3 = -3.0f*damp*exp(damp)*zr*rr2*rr3; ddsc3_3 = -3.0f*damp*expf(damp)*zr*rr2*rr3;
ddsc5_1 = -3.0f*damp*ddsc3_1*rr2; ddsc5_1 = -3.0f*damp*ddsc3_1*rr2;
ddsc5_2 = -3.0f*damp*ddsc3_2*rr2; ddsc5_2 = -3.0f*damp*ddsc3_2*rr2;
......
...@@ -51,7 +51,7 @@ static __device__ void SUB_METHOD_NAME( calculateKirkwoodPairIxn, _kernel)( Kirk ...@@ -51,7 +51,7 @@ static __device__ void SUB_METHOD_NAME( calculateKirkwoodPairIxn, _kernel)( Kirk
float dgfdr = 0.5f*expterm*(1.0f + r2/(rb2*cAmoebaSim.gkc)); float dgfdr = 0.5f*expterm*(1.0f + r2/(rb2*cAmoebaSim.gkc));
float gf2 = 1.0f / (r2 + rb2*expterm); float gf2 = 1.0f / (r2 + rb2*expterm);
float gf = sqrt(gf2); float gf = sqrtf(gf2);
float gf3 = gf2*gf; float gf3 = gf2*gf;
float gf5 = gf3*gf2; float gf5 = gf3*gf2;
float gf7 = gf5*gf2; float gf7 = gf5*gf2;
......
...@@ -320,7 +320,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -320,7 +320,7 @@ void kCalculateAmoebaLocalForces_kernel()
float dy = atomB.y - atomA.y; float dy = atomB.y - atomA.y;
float dz = atomB.z - atomA.z; float dz = atomB.z - atomA.z;
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2); float r = sqrtf(r2);
float deltaIdeal = r - bond.x; float deltaIdeal = r - bond.x;
#if defined OLD #if defined OLD
energy += 0.5f * bond.y * deltaIdeal * deltaIdeal; energy += 0.5f * bond.y * deltaIdeal * deltaIdeal;
...@@ -382,16 +382,16 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -382,16 +382,16 @@ void kCalculateAmoebaLocalForces_kernel()
float3 cp; float3 cp;
CROSS_PRODUCT(v0, v1, cp); CROSS_PRODUCT(v0, v1, cp);
float rp = DOT3(cp, cp); //cx * cx + cy * cy + cz * cz; float rp = DOT3(cp, cp); //cx * cx + cy * cy + cz * cz;
rp = max(sqrt(rp), 1.0e-06f); rp = max(sqrtf(rp), 1.0e-06f);
float r21 = DOT3(v0, v0); // dx1 * dx1 + dy1 * dy1 + dz1 * dz1; float r21 = DOT3(v0, v0); // dx1 * dx1 + dy1 * dy1 + dz1 * dz1;
float r23 = DOT3(v1, v1); // dx2 * dx2 + dy2 * dy2 + dz2 * dz2; float r23 = DOT3(v1, v1); // dx2 * dx2 + dy2 * dy2 + dz2 * dz2;
float dot = DOT3(v0, v1); // dx1 * dx2 + dy1 * dy2 + dz1 * dz2; float dot = DOT3(v0, v1); // dx1 * dx2 + dy1 * dy2 + dz1 * dz2;
float cosine = dot / sqrt(r21 * r23); float cosine = dot / sqrtf(r21 * r23);
// e = angunit * force * dt2 * (1.0d0+cang*dt+qang*dt2+pang*dt3+sang*dt4) // e = angunit * force * dt2 * (1.0d0+cang*dt+qang*dt2+pang*dt3+sang*dt4)
// deddt = angunit * force * dt * radian * (2.0d0 + 3.0d0*cang*dt + 4.0d0*qang*dt2 + 5.0d0*pang*dt3 + 6.0d0*sang*dt4) // deddt = angunit * force * dt * radian * (2.0d0 + 3.0d0*cang*dt + 4.0d0*qang*dt2 + 5.0d0*pang*dt3 + 6.0d0*sang*dt4)
float angle = acos(cosine); float angle = acosf(cosine);
float deltaIdeal = angle*(180.0f/LOCAL_HACK_PI) - bond_angle1.x; float deltaIdeal = angle*(180.0f/LOCAL_HACK_PI) - bond_angle1.x;
float deltaIdeal2 = deltaIdeal *deltaIdeal; float deltaIdeal2 = deltaIdeal *deltaIdeal;
float deltaIdeal3 = deltaIdeal *deltaIdeal2; float deltaIdeal3 = deltaIdeal *deltaIdeal2;
...@@ -507,7 +507,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -507,7 +507,7 @@ void kCalculateAmoebaLocalForces_kernel()
float cosine = product > 0.0f ? (dot/product) : 0.0f; float cosine = product > 0.0f ? (dot/product) : 0.0f;
cosine = cosine > 1.0f ? 1.0f : cosine; cosine = cosine > 1.0f ? 1.0f : cosine;
cosine = cosine < -1.0f ? -1.0f : cosine; cosine = cosine < -1.0f ? -1.0f : cosine;
float angle = acos(cosine); float angle = acosf(cosine);
// if product == 0, set force/energy to 0 // if product == 0, set force/energy to 0
...@@ -670,19 +670,19 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -670,19 +670,19 @@ void kCalculateAmoebaLocalForces_kernel()
float v1 = torsionParam1.x; float v1 = torsionParam1.x;
float angle = torsionParam1.y; float angle = torsionParam1.y;
float c1 = cos( angle ); float c1 = cosf( angle );
float s1 = sin( angle ); float s1 = sinf( angle );
float v2 = torsionParam1.z; float v2 = torsionParam1.z;
angle = torsionParam1.w; angle = torsionParam1.w;
float c2 = cos( angle ); float c2 = cosf( angle );
float s2 = sin( angle ); float s2 = sinf( angle );
float v3 = torsionParam2.x; float v3 = torsionParam2.x;
angle = torsionParam2.y; angle = torsionParam2.y;
float c3 = cos( angle ); float c3 = cosf( angle );
float s3 = sin( angle ); float s3 = sinf( angle );
// compute the multiple angle trigonometry and the phase terms // compute the multiple angle trigonometry and the phase terms
...@@ -857,7 +857,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -857,7 +857,7 @@ void kCalculateAmoebaLocalForces_kernel()
float ru2 = xu*xu + yu*yu + zu*zu; float ru2 = xu*xu + yu*yu + zu*zu;
float rtru = sqrtf(rt2 * ru2); float rtru = sqrtf(rt2 * ru2);
float rdc = sqrt(xdc*xdc + ydc*ydc + zdc*zdc); float rdc = sqrtf(xdc*xdc + ydc*ydc + zdc*zdc);
float cosine = rtru > 0.0f ? (xt*xu + yt*yu + zt*zu) / rtru : 0.0f; float cosine = rtru > 0.0f ? (xt*xu + yt*yu + zt*zu) / rtru : 0.0f;
float sine = (rtru*rdc) > 0.0f ? (xdc*xtu + ydc*ytu + zdc*ztu) / (rdc*rtru) : 0.0f; float sine = (rtru*rdc) > 0.0f ? (xdc*xtu + ydc*ytu + zdc*ztu) / (rdc*rtru) : 0.0f;
...@@ -1020,13 +1020,13 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1020,13 +1020,13 @@ void kCalculateAmoebaLocalForces_kernel()
float yp = zcb*xab - xcb*zab; float yp = zcb*xab - xcb*zab;
float zp = xcb*yab - ycb*xab; float zp = xcb*yab - ycb*xab;
float rp = sqrt(xp*xp + yp*yp + zp*zp); float rp = sqrtf(xp*xp + yp*yp + zp*zp);
float dot = xab*xcb + yab*ycb + zab*zcb; float dot = xab*xcb + yab*ycb + zab*zcb;
float cosine = rab*rcb > 0.0f ? (dot / (rab*rcb)) : 1.0f; float cosine = rab*rcb > 0.0f ? (dot / (rab*rcb)) : 1.0f;
cosine = cosine > 1.0f ? 1.0f : cosine; cosine = cosine > 1.0f ? 1.0f : cosine;
cosine = cosine < -1.0f ? -1.0f : cosine; cosine = cosine < -1.0f ? -1.0f : cosine;
float angle = acos(cosine); float angle = acosf(cosine);
// find chain rule terms for the bond angle deviation // find chain rule terms for the bond angle deviation
...@@ -1158,9 +1158,9 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1158,9 +1158,9 @@ void kCalculateAmoebaLocalForces_kernel()
float adXcd_nrm2 = adXcd_0*adXcd_0 + adXcd_1*adXcd_1 + adXcd_2*adXcd_2; float adXcd_nrm2 = adXcd_0*adXcd_0 + adXcd_1*adXcd_1 + adXcd_2*adXcd_2;
float adXcd_dot_db = xdb*adXcd_0 + ydb*adXcd_1 + zdb*adXcd_2; float adXcd_dot_db = xdb*adXcd_0 + ydb*adXcd_1 + zdb*adXcd_2;
adXcd_dot_db /= sqrt(rdb2*adXcd_nrm2); adXcd_dot_db /= sqrtf(rdb2*adXcd_nrm2);
float angle = abs( asin(adXcd_dot_db) ); float angle = abs( asinf(adXcd_dot_db) );
// find the out-of-plane energy and master chain rule terms // find the out-of-plane energy and master chain rule terms
...@@ -1327,7 +1327,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1327,7 +1327,7 @@ void kCalculateAmoebaLocalForces_kernel()
cosine1 = cosine1 > 1.0f ? 1.0f : cosine1; cosine1 = cosine1 > 1.0f ? 1.0f : cosine1;
cosine1 = cosine1 < -1.0f ? -1.0f : cosine1; cosine1 = cosine1 < -1.0f ? -1.0f : cosine1;
float angle1 = LOCAL_HACK_RADIAN * acos(cosine1); float angle1 = LOCAL_HACK_RADIAN * acosf(cosine1);
float sign = xba*xu + yba*yu + zba*zu; float sign = xba*xu + yba*yu + zba*zu;
angle1 = sign < 0.0f ? -angle1 : angle1; angle1 = sign < 0.0f ? -angle1 : angle1;
float value1 = angle1; float value1 = angle1;
...@@ -1336,7 +1336,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1336,7 +1336,7 @@ void kCalculateAmoebaLocalForces_kernel()
float cosine2 = (xu*xv + yu*yv + zu*zv) / rurv; float cosine2 = (xu*xv + yu*yv + zu*zv) / rurv;
cosine2 = cosine2 > 1.0f ? 1.0f : cosine2; cosine2 = cosine2 > 1.0f ? 1.0f : cosine2;
cosine2 = cosine2 < -1.0f ? -1.0f : cosine2; cosine2 = cosine2 < -1.0f ? -1.0f : cosine2;
float angle2 = LOCAL_HACK_RADIAN * acos(cosine2); float angle2 = LOCAL_HACK_RADIAN * acosf(cosine2);
sign = xcb*xv + ycb*yv + zcb*zv; sign = xcb*xv + ycb*yv + zcb*zv;
angle2 = sign < 0.0f ? -angle2 : angle2; angle2 = sign < 0.0f ? -angle2 : angle2;
...@@ -1577,7 +1577,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1577,7 +1577,7 @@ void kCalculateAmoebaLocalForces_kernel()
float dz = atomB.z - atomA.z; float dz = atomB.z - atomA.z;
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2); float r = sqrtf(r2);
float deltaIdeal = r - bond.x; float deltaIdeal = r - bond.x;
float deltaIdeal2 = deltaIdeal*deltaIdeal; float deltaIdeal2 = deltaIdeal*deltaIdeal;
energy += bond.y * deltaIdeal2*( 1.0f + cAmoebaSim.amoebaUreyBradleyCubicParameter*deltaIdeal + energy += bond.y * deltaIdeal2*( 1.0f + cAmoebaSim.amoebaUreyBradleyCubicParameter*deltaIdeal +
......
...@@ -137,9 +137,9 @@ void kComputeAmoebaBsplines_kernel() ...@@ -137,9 +137,9 @@ void kComputeAmoebaBsplines_kernel()
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) {
float4 posq = cSim.pPosq[i]; float4 posq = cSim.pPosq[i];
posq.x -= floor(posq.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX; posq.x -= floorf(posq.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX;
posq.y -= floor(posq.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY; posq.y -= floorf(posq.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY;
posq.z -= floor(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ; posq.z -= floorf(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
// First axis. // First axis.
...@@ -210,7 +210,7 @@ void kFindAmoebaAtomRangeForGrid_kernel() ...@@ -210,7 +210,7 @@ void kFindAmoebaAtomRangeForGrid_kernel()
// some work in the charge spreading kernel. // some work in the charge spreading kernel.
float posz = cSim.pPosq[atomData.x].z; float posz = cSim.pPosq[atomData.x].z;
posz -= floor(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ; posz -= floorf(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
float w = posz*cSim.invPeriodicBoxSizeZ; float w = posz*cSim.invPeriodicBoxSizeZ;
float fr = cSim.pmeGridSize.z*(w-(int)(w+0.5f)+0.5f); float fr = cSim.pmeGridSize.z*(w-(int)(w+0.5f)+0.5f);
int z = ((int) fr)-AMOEBA_PME_ORDER+1; int z = ((int) fr)-AMOEBA_PME_ORDER+1;
...@@ -449,7 +449,7 @@ void kAmoebaReciprocalConvolution_kernel() ...@@ -449,7 +449,7 @@ void kAmoebaReciprocalConvolution_kernel()
cuComplex grid = cSim.pPmeGrid[index]; cuComplex grid = cSim.pPmeGrid[index];
float m2 = mhx*mhx+mhy*mhy+mhz*mhz; float m2 = mhx*mhx+mhy*mhy+mhz*mhz;
float denom = m2*bx*by*bz; float denom = m2*bx*by*bz;
float eterm = scaleFactor*exp(-expFactor*m2)/denom; float eterm = scaleFactor*expf(-expFactor*m2)/denom;
cSim.pPmeGrid[index] = make_cuComplex(grid.x*eterm, grid.y*eterm); cSim.pPmeGrid[index] = make_cuComplex(grid.x*eterm, grid.y*eterm);
} }
} }
......
...@@ -192,10 +192,10 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5 ...@@ -192,10 +192,10 @@ __device__ void calculateBn_kernel( float r, float4* bn, float* bn0, float *bn5
if( cSim.alphaEwald > 0.0f ){ if( cSim.alphaEwald > 0.0f ){
alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
} }
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
float rr1 = 1.0f/r; float rr1 = 1.0f/r;
*bn0 = erfc(ralpha)*rr1; *bn0 = erfcf(ralpha)*rr1;
float rr2 = rr1*rr1; float rr2 = rr1*rr1;
alsq2n *= alsq2; alsq2n *= alsq2;
...@@ -241,14 +241,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire ...@@ -241,14 +241,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
// periodic box // periodic box
xr -= floor(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; xr -= floorf(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floor(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; yr -= floorf(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floor(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; zr -= floorf(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr*yr + zr*zr; float r2 = xr*xr + yr*yr + zr*zr;
if( r2 <= cSim.nonbondedCutoffSqr ){ if( r2 <= cSim.nonbondedCutoffSqr ){
float r = sqrt(r2); float r = sqrtf(r2);
float ck = atomJ.q; float ck = atomJ.q;
// set the permanent multipole and induced dipole values; // set the permanent multipole and induced dipole values;
...@@ -282,14 +282,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire ...@@ -282,14 +282,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
// calculate the real space error function terms // calculate the real space error function terms
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float bn0 = erfc(ralpha)/r; float bn0 = erfcf(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 0.0f; float alsq2n = 0.0f;
if( cSim.alphaEwald > 0.0f ){ if( cSim.alphaEwald > 0.0f ){
alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
} }
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2; alsq2n *= alsq2;
float bn1 = (bn0+alsq2n*exp2a)/r2; float bn1 = (bn0+alsq2n*exp2a)/r2;
...@@ -336,7 +336,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire ...@@ -336,7 +336,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( const PmeDire
float ratio = r/damp; float ratio = r/damp;
damp = -pgamma*ratio*ratio*ratio; damp = -pgamma*ratio*ratio*ratio;
if( damp > -50.0f ){ if( damp > -50.0f ){
float expdamp = exp(damp); float expdamp = expf(damp);
scale3 = 1.0f - expdamp; scale3 = 1.0f - expdamp;
scale5 = 1.0f - (1.0f-damp)*expdamp; scale5 = 1.0f - (1.0f-damp)*expdamp;
scale7 = 1.0f - (1.0f-damp+0.6f*damp*damp)*expdamp; scale7 = 1.0f - (1.0f-damp+0.6f*damp*damp)*expdamp;
...@@ -957,16 +957,16 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -957,16 +957,16 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// periodic box // periodic box
delta.x -= floor(delta.x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; delta.x -= floorf(delta.x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
delta.y -= floor(delta.y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; delta.y -= floorf(delta.y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
delta.z -= floor(delta.z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; delta.z -= floorf(delta.z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if( delta.w > cSim.nonbondedCutoffSqr ){ if( delta.w > cSim.nonbondedCutoffSqr ){
return; return;
} }
float r = sqrt(delta.w); float r = sqrtf(delta.w);
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
...@@ -974,11 +974,11 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -974,11 +974,11 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
if( cSim.alphaEwald > 0.0f ){ if( cSim.alphaEwald > 0.0f ){
alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
} }
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
float rr1 = 1.0f/r; float rr1 = 1.0f/r;
delta.w = rr1; delta.w = rr1;
float bn0 = erfc(ralpha)*rr1; float bn0 = erfcf(ralpha)*rr1;
*energy += forceFactor*atomI.q*atomJ.q*bn0; *energy += forceFactor*atomI.q*atomJ.q*bn0;
float rr2 = rr1*rr1; float rr2 = rr1*rr1;
alsq2n *= alsq2; alsq2n *= alsq2;
...@@ -1058,16 +1058,16 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrigg_kernel( PmeDirectEle ...@@ -1058,16 +1058,16 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrigg_kernel( PmeDirectEle
// periodic box // periodic box
delta.x -= floor(delta.x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; delta.x -= floorf(delta.x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
delta.y -= floor(delta.y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; delta.y -= floorf(delta.y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
delta.z -= floor(delta.z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; delta.z -= floorf(delta.z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; delta.w = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if( delta.w > cSim.nonbondedCutoffSqr ){ if( delta.w > cSim.nonbondedCutoffSqr ){
return; return;
} }
float r = sqrt(delta.w); float r = sqrtf(delta.w);
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
...@@ -1075,11 +1075,11 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrigg_kernel( PmeDirectEle ...@@ -1075,11 +1075,11 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrigg_kernel( PmeDirectEle
if( cSim.alphaEwald > 0.0f ){ if( cSim.alphaEwald > 0.0f ){
alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
} }
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
float rr1 = 1.0f/r; float rr1 = 1.0f/r;
delta.w = rr1; delta.w = rr1;
float bn0 = erfc(ralpha)*rr1; float bn0 = erfcf(ralpha)*rr1;
*energy += forceFactor*atomI.q*atomJ.q*bn0; *energy += forceFactor*atomI.q*atomJ.q*bn0;
float rr2 = rr1*rr1; float rr2 = rr1*rr1;
alsq2n *= alsq2; alsq2n *= alsq2;
......
...@@ -41,7 +41,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2 ...@@ -41,7 +41,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2
damp = damp < -50.0f ? 0.0f : damp; damp = damp < -50.0f ? 0.0f : damp;
} }
float scale5 = (damp == 0.0f) ? 1.0f : (1.0f - (1.0f-damp)*exp(damp)); float scale5 = (damp == 0.0f) ? 1.0f : (1.0f - (1.0f-damp)*expf(damp));
float rr5 = rr1*rr1; float rr5 = rr1*rr1;
rr5 = 3.0f*rr1*rr5*rr5; rr5 = 3.0f*rr1*rr5*rr5;
#ifdef APPLY_SCALE #ifdef APPLY_SCALE
...@@ -79,7 +79,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2 ...@@ -79,7 +79,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2
ftm23 += (qiuk3 + qiukp3)*psc5; ftm23 += (qiuk3 + qiukp3)*psc5;
#endif #endif
float expdamp = exp(damp); float expdamp = expf(damp);
float scale3 = (damp == 0.0f) ? 1.0f : (1.0f - expdamp); float scale3 = (damp == 0.0f) ? 1.0f : (1.0f - expdamp);
float rr3 = rr1*rr1*rr1; float rr3 = rr1*rr1*rr1;
...@@ -190,7 +190,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2 ...@@ -190,7 +190,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2
if( damp != 0.0f ){ if( damp != 0.0f ){
float expdamp = exp(damp); float expdamp = expf(damp);
float temp3 = -1.5f*damp*expdamp*rr1*rr1; float temp3 = -1.5f*damp*expdamp*rr1*rr1;
float temp5 = -damp; float temp5 = -damp;
float temp7 = -0.2f - 0.6f*damp; float temp7 = -0.2f - 0.6f*damp;
...@@ -395,7 +395,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2 ...@@ -395,7 +395,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnF2
if( damp != 0.0f ){ if( damp != 0.0f ){
float expdamp = exp(damp); float expdamp = expf(damp);
float temp3 = -1.5f*damp*expdamp*rr1*rr1; float temp3 = -1.5f*damp*expdamp*rr1*rr1;
float temp5 = -damp; float temp5 = -damp;
float temp7 = -0.2f - 0.6f*damp; float temp7 = -0.2f - 0.6f*damp;
......
...@@ -42,7 +42,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnT2 ...@@ -42,7 +42,7 @@ static __device__ void SUB_METHOD_NAME( calculatePmeDirectElectrostaticPairIxnT2
float ratio = 1.0f/(rr1*damp); float ratio = 1.0f/(rr1*damp);
damp = -pgamma*ratio*ratio*ratio; damp = -pgamma*ratio*ratio*ratio;
if( damp > -50.0f ){ if( damp > -50.0f ){
float expdamp = exp(damp); float expdamp = expf(damp);
scale3 = 1.0f - expdamp; scale3 = 1.0f - expdamp;
scale5 = 1.0f - (1.0f-damp)*expdamp; scale5 = 1.0f - (1.0f-damp)*expdamp;
scale7 = 1.0f - (1.0f-damp+0.6f*damp*damp)*expdamp; scale7 = 1.0f - (1.0f-damp+0.6f*damp*damp)*expdamp;
......
...@@ -201,9 +201,9 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -201,9 +201,9 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// periodic boundary conditions // periodic boundary conditions
xr -= floor(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; xr -= floorf(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floor(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; yr -= floorf(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floor(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; zr -= floorf(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr*yr + zr*zr; float r2 = xr*xr + yr*yr + zr*zr;
if( r2 <= cSim.nonbondedCutoffSqr ){ if( r2 <= cSim.nonbondedCutoffSqr ){
...@@ -214,10 +214,10 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -214,10 +214,10 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float bn0 = erfc(ralpha)/r; float bn0 = erfcf(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2; alsq2n *= alsq2;
float bn1 = (bn0+alsq2n*exp2a)/r2; float bn1 = (bn0+alsq2n*exp2a)/r2;
...@@ -243,7 +243,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -243,7 +243,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
damp = -pgamma*ratio; damp = -pgamma*ratio;
if( damp > -50.0f) { if( damp > -50.0f) {
float expdamp = exp(damp); float expdamp = expf(damp);
scale3 = 1.0f - expdamp; scale3 = 1.0f - expdamp;
scale5 = 1.0f - expdamp*(1.0f-damp); scale5 = 1.0f - expdamp*(1.0f-damp);
scale7 = 1.0f - expdamp*(1.0f-damp+(0.6f*damp*damp)); scale7 = 1.0f - expdamp*(1.0f-damp+(0.6f*damp*damp));
......
...@@ -85,9 +85,9 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic ...@@ -85,9 +85,9 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic
// pdelta->xiodic boundary conditions // pdelta->xiodic boundary conditions
delta->x -= floor(delta->x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; delta->x -= floorf(delta->x*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
delta->y -= floor(delta->y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; delta->y -= floorf(delta->y*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
delta->z -= floor(delta->z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; delta->z -= floorf(delta->z*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = (delta->x*delta->x) + (delta->y*delta->y) + (delta->z*delta->z); float r2 = (delta->x*delta->x) + (delta->y*delta->y) + (delta->z*delta->z);
if( r2 <= cSim.nonbondedCutoffSqr ){ if( r2 <= cSim.nonbondedCutoffSqr ){
...@@ -97,10 +97,10 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic ...@@ -97,10 +97,10 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float bn0 = erfc(ralpha)/r; float bn0 = erfcf(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2; alsq2n *= alsq2;
float bn1 = (bn0+alsq2n*exp2a)/r2; float bn1 = (bn0+alsq2n*exp2a)/r2;
...@@ -120,7 +120,7 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic ...@@ -120,7 +120,7 @@ __device__ void setupMutualInducedFieldPairIxn_kernel( const MutualInducedPartic
damp = -pgamma*ratio; damp = -pgamma*ratio;
if( damp > -50.0f) { if( damp > -50.0f) {
float expdamp = exp(damp); float expdamp = expf(damp);
scale3 = 1.0f - expdamp; scale3 = 1.0f - expdamp;
scale5 = 1.0f - expdamp*(1.0f-damp); scale5 = 1.0f - expdamp*(1.0f-damp);
} }
...@@ -171,9 +171,9 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce ...@@ -171,9 +171,9 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// periodic boundary conditions // periodic boundary conditions
xr -= floor(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; xr -= floorf(xr*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
yr -= floor(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; yr -= floorf(yr*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
zr -= floor(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; zr -= floorf(zr*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
float r2 = xr*xr + yr* yr + zr*zr; float r2 = xr*xr + yr* yr + zr*zr;
if( r2 <= cSim.nonbondedCutoffSqr ){ if( r2 <= cSim.nonbondedCutoffSqr ){
...@@ -183,10 +183,10 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce ...@@ -183,10 +183,10 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
float ralpha = cSim.alphaEwald*r; float ralpha = cSim.alphaEwald*r;
float bn0 = erfc(ralpha)/r; float bn0 = erfcf(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald); float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
float exp2a = exp(-(ralpha*ralpha)); float exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2; alsq2n *= alsq2;
float bn1 = (bn0+alsq2n*exp2a)/r2; float bn1 = (bn0+alsq2n*exp2a)/r2;
...@@ -206,7 +206,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce ...@@ -206,7 +206,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
damp = -pgamma*ratio; damp = -pgamma*ratio;
if( damp > -50.0f) { if( damp > -50.0f) {
float expdamp = exp(damp); float expdamp = expf(damp);
scale3 = 1.0f - expdamp; scale3 = 1.0f - expdamp;
scale5 = 1.0f - expdamp*(1.0f-damp); scale5 = 1.0f - expdamp*(1.0f-damp);
} }
......
...@@ -117,9 +117,9 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -117,9 +117,9 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
ijForce[2] = psA[j].z - localParticle.z; ijForce[2] = psA[j].z - localParticle.z;
if( cAmoebaSim.vdwUsePBC ) if( cAmoebaSim.vdwUsePBC )
{ {
ijForce[0] -= floor(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; ijForce[0] -= floorf(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
ijForce[1] -= floor(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; ijForce[1] -= floorf(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
ijForce[2] -= floor(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; ijForce[2] -= floorf(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
} }
float energy; float energy;
...@@ -211,9 +211,9 @@ flags = 0xFFFFFFFF; ...@@ -211,9 +211,9 @@ flags = 0xFFFFFFFF;
ijForce[2] = psA[jIdx].z - localParticle.z; ijForce[2] = psA[jIdx].z - localParticle.z;
if( cAmoebaSim.vdwUsePBC ) if( cAmoebaSim.vdwUsePBC )
{ {
ijForce[0] -= floor(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; ijForce[0] -= floorf(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
ijForce[1] -= floor(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; ijForce[1] -= floorf(ijForce[1]*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
ijForce[2] -= floor(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; ijForce[2] -= floorf(ijForce[2]*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
} }
calculateVdw14_7PairIxn_kernel( combindedSigma, combindedEpsilon, ijForce, &energy); calculateVdw14_7PairIxn_kernel( combindedSigma, combindedEpsilon, ijForce, &energy);
......
...@@ -47,9 +47,9 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractionsVdw, _kernel)() ...@@ -47,9 +47,9 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractionsVdw, _kernel)()
float dy = centera.y-centerb.y; float dy = centera.y-centerb.y;
float dz = centera.z-centerb.z; float dz = centera.z-centerb.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float4 boxSizea = cSim.pGridBoundingBox[x]; float4 boxSizea = cSim.pGridBoundingBox[x];
float4 boxSizeb = cSim.pGridBoundingBox[y]; float4 boxSizeb = cSim.pGridBoundingBox[y];
...@@ -108,9 +108,9 @@ __global__ void METHOD_NAME(kFindInteractionsWithinBlocksVdw, _kernel)(unsigned ...@@ -108,9 +108,9 @@ __global__ void METHOD_NAME(kFindInteractionsWithinBlocksVdw, _kernel)(unsigned
float dy = apos.y-center.y; float dy = apos.y-center.y;
float dz = apos.z-center.z; float dz = apos.z-center.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
dx = max(0.0f, abs(dx)-boxSize.x); dx = max(0.0f, abs(dx)-boxSize.x);
dy = max(0.0f, abs(dy)-boxSize.y); dy = max(0.0f, abs(dy)-boxSize.y);
......
...@@ -113,12 +113,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -113,12 +113,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float dy = psA[j].y - apos.y; float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z; float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2); float invR = 1.0f / sqrtf(r2);
float sig = a.x + psA[j].sig; float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps; float eps = a.y * psA[j].eps;
#ifdef USE_SOFTCORE_LJ #ifdef USE_SOFTCORE_LJ
...@@ -147,9 +147,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -147,9 +147,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
// ObcGbsaForce1 part // ObcGbsaForce1 part
float alpha2_ij = br * psA[j].br; float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij); float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = expf(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm; float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2); float denominator = sqrtf(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2); float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij); float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dEdR += Gpol * (1.0f - 0.25f * expTerm); dEdR += Gpol * (1.0f - 0.25f * expTerm);
...@@ -190,12 +190,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -190,12 +190,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float dy = psA[j].y - apos.y; float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z; float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2); float invR = 1.0f / sqrtf(r2);
float sig = a.x + psA[j].sig; float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps; float eps = a.y * psA[j].eps;
...@@ -235,9 +235,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -235,9 +235,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float alpha2_ij = br * psA[j].br; float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij); float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = expf(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm; float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2); float denominator = sqrtf(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2); float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij); float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dEdR += Gpol * (1.0f - 0.25f * expTerm); dEdR += Gpol * (1.0f - 0.25f * expTerm);
...@@ -333,12 +333,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -333,12 +333,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float dy = psA[tj].y - apos.y; float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z; float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2); float invR = 1.0f / sqrtf(r2);
float sig = a.x + psA[tj].sig; float sig = a.x + psA[tj].sig;
float eps = a.y * psA[tj].eps; float eps = a.y * psA[tj].eps;
...@@ -367,9 +367,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -367,9 +367,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float alpha2_ij = br * psA[tj].br; float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij); float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = expf(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm; float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2); float denominator = sqrtf(denominator2);
float Gpol = (q2 * psA[tj].q) / (denominator * denominator2); float Gpol = (q2 * psA[tj].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij); float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dEdR += Gpol * (1.0f - 0.25f * expTerm); dEdR += Gpol * (1.0f - 0.25f * expTerm);
...@@ -418,12 +418,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -418,12 +418,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float dy = psA[j].y - apos.y; float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z; float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2); float invR = 1.0f / sqrtf(r2);
float sig = a.x + psA[j].sig; float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps; float eps = a.y * psA[j].eps;
#ifdef USE_SOFTCORE_LJ #ifdef USE_SOFTCORE_LJ
...@@ -451,9 +451,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -451,9 +451,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
// ObcGbsaForce1 part // ObcGbsaForce1 part
float alpha2_ij = br * psA[j].br; float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij); float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = expf(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm; float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2); float denominator = sqrtf(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2); float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij); float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dEdR += Gpol * (1.0f - 0.25f * expTerm); dEdR += Gpol * (1.0f - 0.25f * expTerm);
...@@ -544,12 +544,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -544,12 +544,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float dy = psA[tj].y - apos.y; float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z; float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floorf(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floorf(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floorf(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2); float invR = 1.0f / sqrtf(r2);
float sig = a.x + psA[tj].sig; float sig = a.x + psA[tj].sig;
float eps = a.y * psA[tj].eps; float eps = a.y * psA[tj].eps;
#ifdef USE_SOFTCORE_LJ #ifdef USE_SOFTCORE_LJ
...@@ -582,9 +582,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -582,9 +582,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
float alpha2_ij = br * psA[tj].br; float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij); float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij); float expTerm = expf(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm; float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2); float denominator = sqrtf(denominator2);
float Gpol = (q2 * psA[tj].q) / (denominator * denominator2); float Gpol = (q2 * psA[tj].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij); float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
dEdR += Gpol * (1.0f - 0.25f * expTerm); dEdR += Gpol * (1.0f - 0.25f * expTerm);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment