/**************************************************************** * This file is part of the gpu acceleration library for gromacs. * Author: V. Vishal * Copyright (C) Pande Group, Stanford, 2006 *****************************************************************/ /* Stochastic Integrator * * Since we don't have random numbers on the GPU, we * precalculate random numbers from a normal distribution * and store them in several large textures. Periodically * these will have to be refreshed on the CPU. * * Doing gathers instead of streams for the random vectors, * but the gathers should be fully coherent anyway. * This is just to avoid some * painful manipulations with multiple streams and domains. * */ /* First part before first constraint call * * * Many things can be precalculated here * If we become compute bound (wonder how), we can precalc stuff * * Assuming constant temperature. If you want to change the * temperature, you have to change all the precalculated * constants that use it. * */ /* kernel void kupdate_sd1( float xstrwidth, //atom stream width float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers //sd constants, prefixed with c //to avoid 1 letter names, cxx = sdc[0].xx float cem, //sd precalculated constants float pc1, //tau_t[0] * ( 1 - sdc[0].em ) float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh) float pc3, //sdc[0].d/(tau_t * sdc[0].c); //sdpc = sd precalculated per atom values //sdpc.x = 1/sqrt(m)*sig[0].Yv //sdpc.y = 1/sqrt(m)*sig[0].V float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd2X<>, //This is calculated in sd2 of the previous step float4 posq<>, float3 f<>, float3 v<>, float invmass<>, out float3 sd1V<>, //This is V in gromacs, reused in sd2 out float3 vnew<>, out float4 posqp<> ){ float3 Vmh; float3 fg1, fg2; float linind_gauss; //linear index into the random numbers float2 igauss; //2d index igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; //2 random vectors used in each fragment linind_gauss = 2 * linind_gauss + goffset; igauss.y = floor( linind_gauss / gstrwidth ); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; igauss.y = floor( linind_gauss / gstrwidth ); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; Vmh = sd2X * pc3 + sdpc.x * fg1; sd1V = sdpc.y * fg2; vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem; posqp.w = posq.w; posqp.xyz = posq.xyz + vnew * pc2; } */ /*Second part of sd update, after first constraints call*/ /* kernel void kupdate_sd2( float xstrwidth, //stream width of atoms float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers float pc1, //this is 1/pc2 for sd1 //= 1/(tau_t*(sdc[0].eph-sdc[0].emh)) float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1) //per atom precalcs //sdpc.x = 1/sqrt(m)*sig[0].Yx //sdpc.y = 1/sqrt(m)*sig[0].X float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd1V<>, //calculated in sd1 float4 posq<>, //positions pre-update float4 posqp<>, //positions post-constraint float3 vnew<>, //velocities after sd1 out float3 sd2X<>, //Used in sd1 out float3 v<>, //velocities corrected for constraints out float4 posqp2<> //Will need to be constrained again ){ float linind_gauss; //linear index into the random numbers float2 igauss; //2d index float3 fg1, fg2; float3 Xmh; igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = 2 * linind_gauss + goffset; igauss.y = floor( linind_gauss / gstrwidth ); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; igauss.y = floor( linind_gauss / gstrwidth ); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; v = pc1 * ( posqp.xyz - posq.xyz ); Xmh = sd1V * pc2 + sdpc.x * fg1; sd2X = sdpc.y * fg2; posqp2 = posqp; posqp2.xyz += sd2X - Xmh; } */ //Applies a permutation to the given random vector stream //Totally bandwidth bound and streams are large, should be done infrequently. //Probably don't want gvin and gvout to be the same stream! kernel void kpermute_vectors( float gstrwidth, float perm<>, float3 gvin[][], out float3 gvout<> ) { float2 ind; //ind.y = floor( perm / gstrwidth ); ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth ); ind.x = perm - ind.y * gstrwidth; gvout = gvin[ ind ]; } /* * Alternative formulation to handle precision better * In updatesd1, we output only deltaV's. The shake * kernel uses the deltaV's and the X's to output * a bunch of constrained deltax's * */ kernel void kupdate_sd1_fix1( float xstrwidth, //atom stream width float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers //sd constants, prefixed with c //to avoid 1 letter names, cxx = sdc[0].xx float cem, //sd precalculated constants float pc1, //tau_t[0] * ( 1 - sdc[0].em ) float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh) float pc3, //sdc[0].d/(tau_t * sdc[0].c); //sdpc = sd precalculated per atom values //sdpc.x = 1/sqrt(m)*sig[0].Yv //sdpc.y = 1/sqrt(m)*sig[0].V float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd2X<>, //This is calculated in sd2 of the previous step float4 posq<>, float3 f<>, float3 v<>, float invmass<>, out float3 sd1V<>, //This is V in gromacs, reused in sd2 out float3 vnew<>, out float4 posqp<> ){ float3 Vmh; float3 fg1, fg2; float linind_gauss; //linear index into the random numbers float2 igauss; //2d index igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; //2 random vectors used in each fragment linind_gauss = 2 * linind_gauss + goffset; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; /* Gromacs code n = 1 -> ngtc sdc[n].gdt = dt/tau_t[n]; sdc[n].eph = exp(sdc[n].gdt/2); sdc[n].emh = exp(-sdc[n].gdt/2); sdc[n].em = exp(-sdc[n].gdt); sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1); // 2.15 in paper (C) sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em; sdc[n].d = 2 - sdc[n].eph - sdc[n].emh; Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran); V[n-start][d] = ism*sig[gt].V*fgauss(&jran); v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) + V[n-start][d] - sdc[gt].em*Vmh; xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh); */ // --------------------------------------------------------------------------------------------- // float pc3 = sdc[0].d/(tau_t * sdc[0].c); //sdpc.x = 1/sqrt(m)*sig[0].Yv Vmh = sd2X * pc3 + sdpc.x * fg1; // --------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------- //sdpc.y = 1/sqrt(m)*sig[0].V sd1V = sdpc.y * fg2; // --------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------- // // float pc1 = tau_t[0] * ( 1 - sdc[0].em ) vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem; // float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh) posqp = posq; posqp.xyz += vnew * pc2; } kernel void kupdate_sd2_fix1( float xstrwidth, //stream width of atoms float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers float pc1, //this is 1/pc2 for sd1 //= 1/(tau_t*(sdc[0].eph-sdc[0].emh)) float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1) //per atom precalcs //sdpc.x = 1/sqrt(m)*sig[0].Yx //sdpc.y = 1/sqrt(m)*sig[0].X float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd1V<>, //calculated in sd1 float4 posq<>, //positions pre-update float4 posqp<>, //deltas post constraint float3 vnew<>, //velocities after sd1 out float3 sd2X<>, //Used in sd1 out float3 v<>, //velocities corrected for constraints out float4 posqp2<> //new deltas, need to be constrained again ){ float linind_gauss; //linear index into the random numbers float2 igauss; //2d index float3 fg1, fg2; float3 Xmh; igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = 2 * linind_gauss + goffset; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; //v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!) v = pc1 * (posqp.xyz - posq.xyz); Xmh = sd1V * pc2 + sdpc.x * fg1; sd2X = sdpc.y * fg2; posqp2 = posqp; posqp2.xyz += sd2X - Xmh; } kernel void kupdate_sd1_fix1_FixedRV( float xstrwidth, //atom stream width float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers //sd constants, prefixed with c //to avoid 1 letter names, cxx = sdc[0].xx float cem, //sd precalculated constants float pc1, //tau_t[0] * ( 1 - sdc[0].em ) float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh) float pc3, //sdc[0].d/(tau_t * sdc[0].c); //sdpc = sd precalculated per atom values //sdpc.x = 1/sqrt(m)*sig[0].Yv //sdpc.y = 1/sqrt(m)*sig[0].V float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd2X<>, //This is calculated in sd2 of the previous step float4 posq<>, float3 f<>, float3 v<>, float invmass<>, out float3 sd1V<>, //This is V in gromacs, reused in sd2 out float3 vnew<>, out float4 posqp<> ){ float3 Vmh; float3 fg1, fg2; float linind_gauss; //linear index into the random numbers float2 igauss; //2d index float RandomValueVsp = 0.1f; igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; //2 random vectors used in each fragment linind_gauss = 2 * linind_gauss + goffset; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; //fg1 = fgauss[ igauss ]; fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp ); linind_gauss += 1.0f; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; //fg2 = fgauss[ igauss ]; fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp ); /* Gromacs code n = 1 -> ngtc sdc[n].gdt = dt/tau_t[n]; sdc[n].eph = exp(sdc[n].gdt/2); sdc[n].emh = exp(-sdc[n].gdt/2); sdc[n].em = exp(-sdc[n].gdt); sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1); // 2.15 in paper (C) sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em; sdc[n].d = 2 - sdc[n].eph - sdc[n].emh; Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran); V[n-start][d] = ism*sig[gt].V*fgauss(&jran); v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) + V[n-start][d] - sdc[gt].em*Vmh; xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh); */ // --------------------------------------------------------------------------------------------- // float pc3 = sdc[0].d/(tau_t * sdc[0].c); //sdpc.x = 1/sqrt(m)*sig[0].Yv Vmh = sd2X * pc3 + sdpc.x * fg1; // --------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------- //sdpc.y = 1/sqrt(m)*sig[0].V sd1V = sdpc.y * fg2; // --------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------- // // float pc1 = tau_t[0] * ( 1 - sdc[0].em ) vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem; // float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh) posqp = posq; posqp.xyz += vnew * pc2; } kernel void kupdate_sd2_fix1_FixedRV( float xstrwidth, //stream width of atoms float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers float pc1, //this is 1/pc2 for sd1 //= 1/(tau_t*(sdc[0].eph-sdc[0].emh)) float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1) //per atom precalcs //sdpc.x = 1/sqrt(m)*sig[0].Yx //sdpc.y = 1/sqrt(m)*sig[0].X float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd1V<>, //calculated in sd1 float4 posq<>, //positions pre-update float4 posqp<>, //deltas post constraint float3 vnew<>, //velocities after sd1 out float3 sd2X<>, //Used in sd1 out float3 v<>, //velocities corrected for constraints out float4 posqp2<> //new deltas, need to be constrained again ){ float linind_gauss; //linear index into the random numbers float2 igauss; //2d index float3 fg1, fg2; float3 Xmh; float RandomValueVsp = 0.1f; igauss = indexof( posq ); linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = 2 * linind_gauss + goffset; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; //fg1 = fgauss[ igauss ]; fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp ); linind_gauss += 1.0f; //igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; // fg2 = fgauss[ igauss ]; fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp ); //v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!) v = pc1 * (posqp.xyz - posq.xyz); Xmh = sd1V * pc2 + sdpc.x * fg1; sd2X = sdpc.y * fg2; posqp2 = posqp; posqp2.xyz += sd2X - Xmh; }