/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Mark Friedrichs, Mike Houston * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ /* After forces above, we have the forces for even numbered particles * in one stream, odd numbered particles in another. * In each stream, the forces are in several parts depending on how * many times we replicated the input stream. * * To avoid an extra kernel to zero forces, this sets the forces * rather than adding to it. * */ kernel void kMergeFloat( float repfac, float atomStrWidth, float pstreamStrWidth, float natoms, float iUnroll, iter float2 count<>, float pstream1[][], float pstream2[][], out float outstream<> ) { float linind; float2 pindex; float odd; float i; //convert to linear atom index linind = count.x + count.y * atomStrWidth; //If odd or even, we pick from diferent streams. odd = linind - floor( linind / iUnroll ) * iUnroll; //Now linear index is the index into partial_streams linind = floor( linind / iUnroll ); outstream = 0.0f; //If we have predicated conditionals, we should //keep the conditional inside the loop for ( i = 0; i < repfac; i+=1.0f ) { pindex.y = floor( linind / pstreamStrWidth ); pindex.x = linind - pindex.y * pstreamStrWidth; if ( odd > 0.5f ) { //is odd outstream += pstream2[ pindex ]; } else { outstream += pstream1[ pindex ]; } linind += natoms/iUnroll; } } kernel void kMergeFloat4( float repfac, float atomStrWidth, float pstreamStrWidth, float natoms, float iUnroll, float4 pstream1[][], float4 pstream2[][], out float4 outstream<> ) { float linind; float2 pindex; float odd; float i; //convert to linear atom index linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth ); //If odd or even, we pick from diferent streams. odd = linind - floor( linind / iUnroll ) * iUnroll; //Now linear index is the index into partial_streams linind = floor( linind / iUnroll ); outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f ); //If we have predicated conditionals, we should //keep the conditional inside the loop for ( i = 0.0f; i < repfac; i+= 1.0f ) { pindex.y = floor( linind / pstreamStrWidth ); pindex.x = linind - pindex.y * pstreamStrWidth; if ( odd > 0.5f ) { //is odd outstream += pstream2[ pindex ]; } else { outstream += pstream1[ pindex ]; } linind += natoms/iUnroll; } } /* After forces above, we have the forces for even numbered particles * in one stream, odd numbered particles in another. * In each stream, the forces are in several parts depending on how * many times we replicated the input stream. * * To avoid an extra kernel to zero forces, this sets the forces * rather than adding to it. * */ kernel void kMergeFloat4_4X( float repfac, float atomStrWidth, float pstreamStrWidth, float natoms, float iUnroll, float4 pstream1[][], float4 pstream2[][], float4 pstream3[][], float4 pstream4[][], out float4 outstream<> ) { float linind; float2 pindex; float odd; float i; //convert to linear atom index linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth ); //If odd or even, we pick from diferent streams. odd = linind - floor( linind / iUnroll ) * iUnroll; //Now linear index is the index into partial_streams linind = floor( linind / iUnroll ); outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f ); //If we have predicated conditionals, we should //keep the conditional inside the loop for ( i = 0.0f; i < repfac; i+= 1.0f ) { //pindex.y = floor( linind / pstreamStrWidth ); //pindex.x = linind - pindex.y * pstreamStrWidth; pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify pindex.x = linind - pindex.y * pstreamStrWidth; outstream += float4( linind, odd, pindex.x, pindex.y ); /* if ( odd < 0.5f ) { //is odd outstream += pstream1[ pindex ]; } else if( odd < 1.5f ){ outstream += pstream2[ pindex ]; } else if( odd < 2.5f ){ outstream += pstream3[ pindex ]; } else { outstream += pstream4[ pindex ]; } */ linind += natoms/iUnroll; } } kernel void kMergeFloat4_4( float repfac, float atomStreamWidth, float pStreamWidth, float natoms, float roundNatoms, float iUnroll, float4 pstream1[][], float4 pstream2[][], float4 pstream3[][], float4 pstream4[][], out float4 outstream<> ) { float atomIndex, forceIndex, qIndex, qOff; float2 pindex; float i; // given atom index find force indices and streams pindex = indexof( outstream ); atomIndex = pindex.x + pindex.y*atomStreamWidth; forceIndex = atomIndex; outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f ); for( i = 0.0f; i < repfac; i += 1.0f ){ // qIndex = floor( forceIndex/iUnroll ); qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll ); qOff = forceIndex - iUnroll*qIndex; // pindex.y = floor( qIndex/ pStreamWidth ); pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth ); // pindex.x = qIndex - pindex.y*pStreamWidth + qOff; pindex.x = qIndex - pindex.y*pStreamWidth; // outstream += float4( forceIndex, qIndex, pindex.x, pindex.y ); if ( qOff < 0.5f ){ outstream += pstream1[ pindex ]; } else if( qOff < 1.5f ){ outstream += pstream2[ pindex ]; } else if( qOff < 2.5f ){ outstream += pstream3[ pindex ]; } else { outstream += pstream4[ pindex ]; } forceIndex += roundNatoms; } } kernel void kSetValue4( float value, out float4 outstream<> ){ outstream = float4( value, value, value, value ); } kernel void kSetValue3( float value, out float3 outstream<> ){ outstream = float3( value, value, value ); } kernel void kSetValue2( float value, out float2 outstream<> ){ outstream = float2( value, value ); } kernel void kSetValue1( float value, out float outstream<> ){ outstream = value; } kernel void kCheck( float natoms, float atomStrWidth, float pstreamStrWidth, float unroll, out float4 outstream<> ) { float linind, forceIndex, atomIndex; float2 pindex; pindex = indexof( outstream ); forceIndex = unroll*(pindex.x + pindex.y*pstreamStrWidth); atomIndex = fmod( forceIndex, natoms ); outstream = float4( pindex.x, pindex.y, forceIndex, atomIndex ); } /* After forces above, we have the forces for even numbered particles * in one stream, odd numbered particles in another. * In each stream, the forces are in several parts depending on how * many times we replicated the input stream. * * To avoid an extra kernel to zero forces, this sets the forces * rather than adding to it. * */ kernel void kAddAndMergeFloat4( float repfac, float atomStrWidth, float pstreamStrWidth, float natoms, float iUnroll, float4 inStream<>, float4 pstream1[][], float4 pstream2[][], out float4 outstream<> ) { float linind; float2 pindex; float odd; float i; float floor_linind_iUnroll; linind = (indexof outstream).x + (indexof outstream).y * atomStrWidth; //If odd or even, we pick from diferent streams. //odd = linind - floor( linind / iUnroll ) * iUnroll; //Now linear index is the index into partial_streams //linind = floor( linind / iUnroll ); floor_linind_iUnroll = round( (linind - fmod(linind, iUnroll))/iUnroll ); odd = linind - floor_linind_iUnroll * iUnroll;//bixia modify linind = floor_linind_iUnroll; //bixia modify outstream = inStream; outstream.w = 0.0f; //If we have predicated conditionals, we should //keep the conditional inside the loop for ( i = 0.0f; i < repfac; i+= 1.0f ) { //pindex.y = floor( linind / pstreamStrWidth ); pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify pindex.x = linind - pindex.y * pstreamStrWidth; if ( odd > 0.5f ) { //is odd outstream += pstream2[ pindex ]; } else { outstream += pstream1[ pindex ]; } linind += natoms/iUnroll; } } kernel void kAddAndMergeFloat4_4( float repfac, float atomStreamWidth, float pStreamWidth, float natoms, float roundNatoms, float iUnroll, float4 inStream<>, float4 pstream1[][], float4 pstream2[][], float4 pstream3[][], float4 pstream4[][], out float4 outstream<> ){ float atomIndex, forceIndex, qIndex, qOff; float2 pindex; float i; // given atom index find force indices and streams pindex = indexof( outstream ); atomIndex = pindex.x + pindex.y*atomStreamWidth; forceIndex = atomIndex; // add current forces in inStream to forces stored in pstreams // the .w entry is Born sum values; it will be used to calculate the // Born radii and obcChain term outstream = inStream; outstream.w = 0.0f; //outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f ); // sum over j-loop 'duplications' by gathering from pstreams for( i = 0.0f; i < repfac; i += 1.0f ){ // qIndex = floor( forceIndex/iUnroll ); qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll ); qOff = forceIndex - iUnroll*qIndex; // pindex.y = floor( qIndex/ pStreamWidth ); pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth ); // pindex.x = qIndex - pindex.y*pStreamWidth + qOff; pindex.x = qIndex - pindex.y*pStreamWidth; if( qOff < 0.5f ){ outstream += pstream1[ pindex ]; } else if( qOff < 1.5f ){ outstream += pstream2[ pindex ]; } else if( qOff < 2.5f ){ outstream += pstream3[ pindex ]; } else { outstream += pstream4[ pindex ]; } forceIndex += roundNatoms; } } /* Add forces from two streams */ kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){ outForce.xyz = force1 + conversion*force2.xyz; } /* Copy one stream to another */ kernel void kCopyFloat4( float4 inForce<>, out float4 outForce<> ){ outForce = inForce; } /* Copy one stream to another * */ kernel void kCopyFloat3To4( float3 inForce<>, out float4 outForce<> ){ // --------------------------------------------------------------------------------------- outForce.xyz = inForce; outForce.w = 0.0f; }