/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2008-2009 Stanford University and the Authors. * * Authors: Peter Eastman, Mark Friedrichs, Chris Bruns, Mike Houston * * Contributors: * * * * Permission is hereby granted, free of charge, to any person obtaining a * * copy of this software and associated documentation files (the "Software"), * * to deal in the Software without restriction, including without limitation * * the rights to use, copy, modify, merge, publish, distribute, sublicense, * * and/or sell copies of the Software, and to permit persons to whom the * * Software is furnished to do so, subject to the following conditions: * * * * The above copyright notice and this permission notice shall be included in * * all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * * USE OR OTHER DEALINGS IN THE SOFTWARE. * * -------------------------------------------------------------------------- */ //Gather kernel for use with diferent angle, dihedral and improper functions // //For small systems, the overhead of calling the kernel is so high //That we have to minimize the number of kernel calls. At the same //time we don't want to waste reads, so I'm providing a bunch of kernels //here that are unrolled to diferent extents. Use the appropriate one //depending on how many inverse maps there are, rather than looping over //the simple one. //helper function to make the unrolling look better kernel float3 do_gather( float strwidth, float4 invmap<>, float3 forces[][] ) { float3 f; float4 quotient, remainder; float2 idx; f = float3( 0.0f, 0.0f, 0.0f ); //Convert from linear to 2D index // quotient = floor( invmap / strwidth ); quotient = round( ( invmap - fmod(invmap, strwidth))/strwidth ); remainder = invmap - quotient * strwidth; //Add each force only if non-negative if ( invmap.x >= 0.0f ) { idx.y = quotient.x; idx.x = remainder.x; f = forces[ idx ]; } if ( invmap.y >= 0.0f ) { idx.y = quotient.y; idx.x = remainder.y; f += forces[ idx ]; } if ( invmap.z >= 0.0f ) { idx.y = quotient.z; idx.x = remainder.z; f += forces[ idx ]; } if ( invmap.w >= 0.0f ) { idx.y = quotient.w; idx.x = remainder.w; f += forces[ idx ]; } return f; } //helper function to make the unrolling look better kernel float3 do_gather_nobranch( float strwidth, float4 invmap<>, float3 forces[][] ) { float3 f1, f2, f3, f4, f, z, t; float4 quotient, remainder; float2 idx; float m1, m2, m3, m4; z = float3( 0.0f, 0.0f, 0.0f ); f=z; //Convert from linear to 2D index // quotient = floor( invmap / strwidth ); quotient = round( ( invmap - fmod(invmap, strwidth))/strwidth ); remainder = invmap - quotient * strwidth; m1 = invmap.x; m2 = invmap.y; m3 = invmap.z; m4 = invmap.w; //Add each force only if non-negative idx.y = quotient.x; idx.x = remainder.x; f1 = forces[ idx ]; idx.y = quotient.y; idx.x = remainder.y; f2 = forces[ idx ]; idx.y = quotient.z; idx.x = remainder.z; f3 = forces[ idx ]; idx.y = quotient.w; idx.x = remainder.w; f4 = forces[ idx ]; f = (m1 >= 0.0f) ? f1 : z; t = (m2 >= 0.0f) ? f2 : z; f = f+t; t = (m3 >= 0.0f) ? f3 : z; f = f+t; t = (m4 >= 0.0f) ? f4 : z; f = f+t; return f; } //Simple version, takes only one index stream kernel void kinvmap_gather( float strwidth, //stream width of the dihedral forces float4 invmap<>, //indices into the dihedral forces float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap, forces ); } //Takes two inverse maps kernel void kinvmap_gather2( float strwidth, //stream width of the dihedral forces float4 invmap1<>, //indices into the dihedral forces float4 invmap2<>, //indices into the dihedral forces float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap1, forces ); outforce += do_gather_nobranch( strwidth, invmap2, forces ); } //Takes three inverse maps kernel void kinvmap_gather3( float strwidth, //stream width of the dihedral forces float4 invmap1<>, //indices into the dihedral forces float4 invmap2<>, //indices into the dihedral forces float4 invmap3<>, float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap1, forces ); outforce += do_gather_nobranch( strwidth, invmap2, forces ); outforce += do_gather_nobranch( strwidth, invmap3, forces ); } //Takes four inverse maps kernel void kinvmap_gather4( float strwidth, //stream width of the dihedral forces float4 invmap1<>, //indices into the dihedral forces float4 invmap2<>, //indices into the dihedral forces float4 invmap3<>, float4 invmap4<>, float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap1, forces ); outforce += do_gather_nobranch( strwidth, invmap2, forces ); outforce += do_gather_nobranch( strwidth, invmap3, forces ); outforce += do_gather_nobranch( strwidth, invmap4, forces ); } //Takes five inverse maps kernel void kinvmap_gather5( float strwidth, //stream width of the dihedral forces float4 invmap1<>, //indices into the dihedral forces float4 invmap2<>, //indices into the dihedral forces float4 invmap3<>, float4 invmap4<>, float4 invmap5<>, float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap1, forces ); outforce += do_gather_nobranch( strwidth, invmap2, forces ); outforce += do_gather_nobranch( strwidth, invmap3, forces ); outforce += do_gather_nobranch( strwidth, invmap4, forces ); outforce += do_gather_nobranch( strwidth, invmap5, forces ); } //Takes six inverse maps - this is the last one! kernel void kinvmap_gather6( float strwidth, //stream width of the dihedral forces float4 invmap1<>, //indices into the dihedral forces float4 invmap2<>, //indices into the dihedral forces float4 invmap3<>, float4 invmap4<>, float4 invmap5<>, float4 invmap6<>, float3 forces[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap1, forces ); outforce += do_gather_nobranch( strwidth, invmap2, forces ); outforce += do_gather_nobranch( strwidth, invmap3, forces ); outforce += do_gather_nobranch( strwidth, invmap4, forces ); outforce += do_gather_nobranch( strwidth, invmap5, forces ); outforce += do_gather_nobranch( strwidth, invmap6, forces ); } //Takes three + four inverse maps kernel void kinvmap_gather2_2( float strwidth, //stream width of the dihedral forces float4 invmap3_1<>, //indices into the dihedral forces float4 invmap3_2<>, //indices into the dihedral forces float3 forces3[][], //dihedral forces float4 invmap4_1<>, //indices into the dihedral forces float4 invmap4_2<>, //indices into the dihedral forces float3 forces4[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 ); outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 ); } //Takes three + four inverse maps kernel void kinvmap_gather3_3( float strwidth, //stream width of the dihedral forces float4 invmap3_1<>, //indices into the dihedral forces float4 invmap3_2<>, //indices into the dihedral forces float4 invmap3_3<>, float3 forces3[][], //dihedral forces float4 invmap4_1<>, //indices into the dihedral forces float4 invmap4_2<>, //indices into the dihedral forces float4 invmap4_3<>, float3 forces4[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 ); outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 ); } //Takes three + four inverse maps kernel void kinvmap_gather3_4( float strwidth, //stream width of the dihedral forces float4 invmap3_1<>, //indices into the dihedral forces float4 invmap3_2<>, //indices into the dihedral forces float4 invmap3_3<>, float3 forces3[][], //dihedral forces float4 invmap4_1<>, //indices into the dihedral forces float4 invmap4_2<>, //indices into the dihedral forces float4 invmap4_3<>, float4 invmap4_4<>, float3 forces4[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 ); outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 ); } //Takes three + five inverse maps kernel void kinvmap_gather3_5( float strwidth, //stream width of the dihedral forces float4 invmap3_1[][], //indices into the dihedral forces float4 invmap3_2[][], //indices into the dihedral forces float4 invmap3_3[][], float3 forces3[][], //dihedral forces float4 invmap5_1[][], //indices into the dihedral forces float4 invmap5_2[][], //indices into the dihedral forces float4 invmap5_3[][], float4 invmap5_4[][], float4 invmap5_5[][], float3 forces5[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { float2 idx = indexof(outforce); outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_2[idx], forces3 ); outforce += do_gather_nobranch( strwidth, invmap3_3[idx], forces3 ); outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_2[idx], forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_3[idx], forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_4[idx], forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_5[idx], forces5 ); } //Takes one + one inverse maps kernel void kinvmap_gather1_1( float strwidth, //stream width of the dihedral forces float4 invmap3_1[][], //indices into the dihedral forces float3 forces3[][], //dihedral forces float4 invmap5_1[][], //indices into the dihedral forces float3 forces5[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { float2 idx = indexof(outforce); outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 ); outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 ); } //Takes five + two inverse maps kernel void kinvmap_gather5_2( float strwidth, //stream width of the dihedral forces float4 invmap5_1<>, //indices into the dihedral forces float4 invmap5_2<>, //indices into the dihedral forces float4 invmap5_3<>, float4 invmap5_4<>, float4 invmap5_5<>, float3 forces5[][], //dihedral forces float4 invmap2_1<>, //indices into the dihedral forces float4 invmap2_2<>, //indices into the dihedral forces float3 forces2[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 ); outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 ); outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 ); outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 ); } //Takes five + two inverse maps kernel void kinvmap_gather4_2( float strwidth, //stream width of the dihedral forces float4 invmap4_1<>, //indices into the dihedral forces float4 invmap4_2<>, //indices into the dihedral forces float4 invmap4_3<>, float4 invmap4_4<>, float3 forces4[][], //dihedral forces float4 invmap2_1<>, //indices into the dihedral forces float4 invmap2_2<>, //indices into the dihedral forces float3 forces2[][], //dihedral forces float3 inforce<>, //particle forces before out float3 outforce<> //particle forces after ) { outforce = inforce; outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 ); outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 ); outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 ); outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 ); } kernel float3 etch_force( float fpos, float strwidth, float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][] ) { float2 ind; float _fpos; _fpos = fpos; if ( _fpos > 300000.0f ) { _fpos = _fpos - 300000.0f; //ind.y = floor( _fpos / strwidth ); ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth ); ind.x = _fpos - ind.y * strwidth; return fl[ ind ]; } else if ( _fpos > 200000.0f ) { _fpos = _fpos - 200000.0f; //ind.y = floor( _fpos / strwidth ); ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth ); ind.x = _fpos - ind.y * strwidth; return fk[ ind ]; } else if ( _fpos > 100000.0f ) { _fpos = _fpos - 100000.0f; //ind.y = floor( _fpos / strwidth ); ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth ); ind.x = _fpos - ind.y * strwidth; return fj[ ind ]; } else if ( _fpos >= -0.5f ) { //ind.y = floor( _fpos / strwidth ); ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth ); ind.x = _fpos - ind.y * strwidth; return fi[ ind ]; } else return 0.0f; } //For-loop version doesn't work //Using a merged version of the above kernel float2 linear_to_2D( float linind, float width ) { float2 ind; //ind.y = floor( linind / width ); ind.y = round( ( linind - fmod( linind, width))/width ); ind.x = linind - ind.y * width; return ind; } //helper function to make the unrolling look better kernel float3 do_gather_merged_single( float strwidth, float invmap, float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][] ) { float3 f; float2 idx; float _invmap; float n; _invmap = invmap; n = floor( _invmap / 100000.0f ); _invmap -= n * 100000.0f; idx = linear_to_2D( _invmap, strwidth ); if ( n > 2.5f ) { f = fl[ idx ]; } else if ( n > 1.5f ) { f = fk[ idx ]; } else if ( n > 0.5f ) { f = fj[ idx ]; } else if( n > -0.5f ) { f = fi[ idx ]; } return f; } kernel float3 do_gather_merged( float strwidth, float4 invmap, float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][]) { float3 f; f = do_gather_merged_single( strwidth, invmap.x, fi, fj, fk, fl ) + do_gather_merged_single( strwidth, invmap.y, fi, fj, fk, fl ) + do_gather_merged_single( strwidth, invmap.z, fi, fj, fk, fl ) + do_gather_merged_single( strwidth, invmap.w, fi, fj, fk, fl ); return f; } kernel void kinvmap_gather_merged5( float natoms, //number of atoms float strwidth, //stream width of out-of-order forces float4 invmap0<>, float4 invmap1<>, float4 invmap2<>, float4 invmap3<>, float4 invmap4<>, float3 fi[][], //i-forces float3 fj[][], //j-forces float3 fk[][], //k-forces float3 fl[][], //l-forces float3 inforce<>, out float3 outforce<> ) { outforce = inforce; outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap1, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap2, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap3, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap4, fi, fj, fk, fl ); } kernel void kinvmap_gather_merged9( float natoms, //number of atoms float strwidth, //stream width of out-of-order forces float4 invmap0<>, float4 invmap1<>, float4 invmap2<>, float4 invmap3<>, float4 invmap4<>, float4 invmap5<>, float4 invmap6<>, float4 invmap7<>, float4 invmap8<>, float3 fi[][], //i-forces float3 fj[][], //j-forces float3 fk[][], //k-forces float3 fl[][], //l-forces float3 inforce<>, out float3 outforce<> ) { outforce = inforce; outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap1, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap2, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap3, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap4, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap5, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap6, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap7, fi, fj, fk, fl ) + do_gather_merged( strwidth, invmap8, fi, fj, fk, fl ); }