/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Mark Friedrichs, Mike Houston * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ //Applies a permutation to the given random vector stream //Totally bandwidth bound and streams are large, should be done infrequently. //Probably don't want gvin and gvout to be the same stream! kernel void kpermute_vectors( float gstrwidth, float perm<>, float3 gvin[][], out float3 gvout<> ){ float2 ind; //ind.y = floor( perm / gstrwidth ); ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth ); ind.x = perm - ind.y * gstrwidth; gvout = gvin[ ind ]; } /* Stochastic Integrator * * Since we don't have random numbers on the GPU, we * precalculate random numbers from a normal distribution * and store them in several large textures. Periodically * these will have to be refreshed on the CPU. * * Doing gathers instead of streams for the random vectors, * but the gathers should be fully coherent anyway. * This is just to avoid some * painful manipulations with multiple streams and domains. * */ /* First part before first constraint call * * * Many things can be precalculated here * If we become compute bound (wonder how), we can precalc stuff * * Assuming constant temperature. If you want to change the * temperature, you have to change all the precalculated * constants that use it. * */ /* * Alternative formulation to handle precision better * In updatesd1, we output only deltaV's. The shake * kernel uses the deltaV's and the X's to output * a bunch of constrained deltax's * */ kernel void kupdate_sd1_fix1( float xstrwidth, //atom stream width float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers //sd constants, prefixed with c //to avoid 1 letter names, cxx = sdc[0].xx float cem, //sd precalculated constants float pc1, //tau_t[0] * ( 1 - sdc[0].em ) float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh) float pc3, //sdc[0].d/(tau_t * sdc[0].c); //sdpc = sd precalculated per atom values //sdpc.x = 1/sqrt(m)*sig[0].Yv //sdpc.y = 1/sqrt(m)*sig[0].V float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd2X<>, //This is calculated in sd2 of the previous step float3 posq<>, float3 f<>, float3 v<>, float invmass<>, out float3 sd1V<>, //This is V in gromacs, reused in sd2 out float3 vnew<>, out float3 posqp<> ){ float c1; float3 Vmh; float3 fg1, fg2; float linind_gauss; //linear index into the random numbers float2 igauss; //2d index igauss = indexof( posq ).xy; linind_gauss = igauss.y * xstrwidth + igauss.x; //2 random vectors used in each fragment linind_gauss = 2.0f * linind_gauss + goffset; igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; //Vmh = (sd2X*pc3) + (sdpc.x*fg1); Vmh.x = pc3*sd2X.x + sdpc.x*fg1.x; Vmh.y = pc3*sd2X.y + sdpc.x*fg1.y; Vmh.z = pc3*sd2X.z + sdpc.x*fg1.z; //sd1V = sdpc.y*fg2; sd1V.x = sdpc.y*fg2.x; sd1V.y = sdpc.y*fg2.y; sd1V.z = sdpc.y*fg2.z; //vnew = (v*cem) + (invmass*f*pc1) + sd1V - (Vmh*cem); c1 = invmass*pc1; vnew.x = cem*(v.x - Vmh.x) + c1*f.x + sd1V.x; vnew.y = cem*(v.y - Vmh.y) + c1*f.y + sd1V.y; vnew.z = cem*(v.z - Vmh.z) + c1*f.z + sd1V.z; posqp.x = pc2*vnew.x; posqp.y = pc2*vnew.y; posqp.z = pc2*vnew.z; } kernel void kupdate_sd2_fix1( float xstrwidth, //stream width of atoms float gstrwidth, //stream width of random numbers float goffset, //starting offset for random numbers float pc1, //this is 1/pc2 for sd1 //= 1/(tau_t*(sdc[0].eph-sdc[0].emh)) float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1) //per atom precalcs //sdpc.x = 1/sqrt(m)*sig[0].Yx //sdpc.y = 1/sqrt(m)*sig[0].X float2 sdpc<>, //Normally distributed random vectors float3 fgauss[][], float3 sd1V<>, //calculated in sd1 float3 posq<>, //positions pre-update float3 posqp<>, //deltas post constraint float3 vnew<>, //velocities after sd1 out float3 sd2X<>, //Used in sd1 out float3 v<>, //velocities corrected for constraints out float3 posqp2<> //new deltas, need to be constrained again ){ float linind_gauss; //linear index into the random numbers float2 igauss; //2d index float3 fg1, fg2; float3 Xmh; igauss = indexof( posq ).xy; linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = 2.0f * linind_gauss + goffset; igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ]; linind_gauss += 1.0f; igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ]; v.x = pc1*posqp.x; v.y = pc1*posqp.y; v.z = pc1*posqp.z; //Xmh = sd1V * pc2 + sdpc.x * fg1; Xmh.x = pc2*sd1V.x + sdpc.x*fg1.x; Xmh.y = pc2*sd1V.y + sdpc.x*fg1.y; Xmh.z = pc2*sd1V.z + sdpc.x*fg1.z; //sd2X = sdpc.y * fg2; sd2X.x = sdpc.y*fg2.x; sd2X.y = sdpc.y*fg2.y; sd2X.z = sdpc.y*fg2.z; posqp2 = posqp + sd2X - Xmh; }