/**************************************************************** * This file is part of the gpu acceleration library for gromacs. * Author: V. Vishal * Copyright (C) Pande Group, Stanford, 2006 *****************************************************************/ //Ryckaert Bellman dihedrals, needed for Amber/OPLS ff's // //Input is a stream of quartets i, j, k, l and the output is //four float3 streams fi, fj, fk, fl. //If by any chance this kernel becomes the bottleneck, we will //optimize, but for now, this is kept pretty simple. //To keep things streaming, we have a stream of 6 parameters(a float4 and float2) //for each dihedral. kernel void kpdih( float xstrwidth, //stream width for x float4 xq[][], //particle coordinates and charges float4 atoms<>, //ijkl quartets float4 parms<>, //parms = ( cp, phi0, mult, 0.0 ) out float3 fi<>, //output forces for i, j, k, l out float3 fj<>, out float3 fk<>, out float3 fl<> ) { float3 r_ij, r_kj, r_kl; float2 ai, aj, ak, al; float3 m, n; float sgnphi; float cosfac; float phi, ddphi, mdphi; float3 u, v, s; float nrkj, nrkj2, msq, nsq, cos_phi, sin_phi; //Convert from linear indices to 2D indices into x //If this kernel is compute bound, we can do this //conversion before-hand and feed in the 2D coordinates ai.y = floor( atoms.x / xstrwidth ); ai.x = atoms.x - ai.y * xstrwidth; aj.y = floor( atoms.y / xstrwidth ); aj.x = atoms.y - aj.y * xstrwidth; ak.y = floor( atoms.z / xstrwidth ); ak.x = atoms.z - ak.y * xstrwidth; al.y = floor( atoms.w / xstrwidth ); al.x = atoms.w - al.y * xstrwidth; r_ij = xq[ai].xyz - xq[aj].xyz; //3 r_kj = xq[ak].xyz - xq[aj].xyz; //3 r_kl = xq[ak].xyz - xq[al].xyz; //3 m = cross( r_ij, r_kj ); //9 n = cross( r_kj, r_kl ); //9 msq = dot(m, m); //5 nsq = dot(n, n); //5 cos_phi = clamp( dot(m, n)/sqrt(msq*nsq), -1.0, 1.0 ); //8 sgnphi = sign( dot( r_ij, n ) ); //5 phi = sgnphi * acos( cos_phi ); //2 mdphi = parms.z * phi - parms.y; //2 ddphi = - parms.x * parms.z * sin( mdphi ); //3 nrkj2 = dot( r_kj, r_kj ); //5 nrkj = sqrt( nrkj2 ); //1 fi = -ddphi * nrkj / msq * m; //5 fl = ddphi * nrkj / nsq * n; //5 u = dot( r_ij, r_kj ) / nrkj2 * fi; //9 v = dot( r_kl, r_kj ) / nrkj2 * fl; //9 s = u - v; //3 fj = s - fi; //3 fk = -(s + fl); //3 //Total : 100 flops }