/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2010 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include #include #include #include #include #include #include using namespace std; #include "gputypes.h" #include "cudatypes.h" #define LOCAL_HACK_PI 3.1415926535897932384626433832795 #define DOT3(v1, v2) (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z) #define CROSS_PRODUCT(v1, v2) make_float3(v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x) #define GETNORMEDDOTPRODUCT(v1, v2, dp) \ { \ dp = DOT3(v1, v2); \ float norm1 = DOT3(v1, v1); \ float norm2 = DOT3(v2, v2); \ dp /= sqrt(norm1 * norm2); \ dp = min(dp, 1.0f); \ dp = max(dp, -1.0f); \ } #define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \ { \ float dp; \ GETNORMEDDOTPRODUCT(v1, v2, dp); \ if (dp > 0.99f || dp < -0.99f) { \ float3 cross = CROSS_PRODUCT(v1, v2); \ float scale = DOT3(v1, v1)*DOT3(v2, v2); \ angle = asin(sqrt(DOT3(cross, cross)/scale)); \ if (dp < 0.0f) \ angle = LOCAL_HACK_PI-angle; \ } \ else { \ angle = acos(dp); \ } \ } #define GETDIHEDRALANGLEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle) \ { \ cp0 = CROSS_PRODUCT(vector1, vector2); \ cp1 = CROSS_PRODUCT(vector2, vector3); \ GETANGLEBETWEENTWOVECTORS(cp0, cp1, angle); \ float dp = DOT3(signVector, cp1); \ angle = (dp >= 0) ? angle : -angle; \ } __global__ #if (__CUDA_ARCH__ >= 200) __launch_bounds__(GF1XX_LOCALFORCES_THREADS_PER_BLOCK, 1) #elif (__CUDA_ARCH__ >= 120) __launch_bounds__(GT2XX_LOCALFORCES_THREADS_PER_BLOCK, 1) #else __launch_bounds__(G8X_LOCALFORCES_THREADS_PER_BLOCK, 1) #endif void kCalculateCMAPTorsionForces_kernel(int numAtoms, int numTorsions, float4* forceBuffers, float* energyBuffer, float4* posq, float4* coeff, int2* mapPositions, int4* indices, int* maps) { const float PI = 3.14159265358979323846f; float energy = 0.0f; for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numTorsions; index += blockDim.x*gridDim.x) { int4 atoms1 = indices[4*index]; int4 atoms2 = indices[4*index+1]; int4 atoms3 = indices[4*index+2]; int4 atoms4 = indices[4*index+3]; float4 a1 = posq[atoms1.x]; float4 a2 = posq[atoms1.y]; float4 a3 = posq[atoms1.z]; float4 a4 = posq[atoms1.w]; float4 b1 = posq[atoms2.x]; float4 b2 = posq[atoms2.y]; float4 b3 = posq[atoms2.z]; float4 b4 = posq[atoms2.w]; // Compute the first angle. float3 v0a = make_float3(a1.x-a2.x, a1.y-a2.y, a1.z-a2.z); float3 v1a = make_float3(a3.x-a2.x, a3.y-a2.y, a3.z-a2.z); float3 v2a = make_float3(a3.x-a4.x, a3.y-a4.y, a3.z-a4.z); float3 cp0a, cp1a; float angleA; GETDIHEDRALANGLEBETWEENTHREEVECTORS(v0a, v1a, v2a, v0a, cp0a, cp1a, angleA); angleA = fmod(angleA+2.0f*PI, 2.0f*PI); // Compute the second angle. float3 v0b = make_float3(b1.x-b2.x, b1.y-b2.y, b1.z-b2.z); float3 v1b = make_float3(b3.x-b2.x, b3.y-b2.y, b3.z-b2.z); float3 v2b = make_float3(b3.x-b4.x, b3.y-b4.y, b3.z-b4.z); float3 cp0b, cp1b; float angleB; GETDIHEDRALANGLEBETWEENTHREEVECTORS(v0b, v1b, v2b, v0b, cp0b, cp1b, angleB); angleB = fmod(angleB+2.0f*PI, 2.0f*PI); // Identify which patch this is in. int2 pos = mapPositions[maps[index]]; int size = pos.y; float delta = 2.0f*PI/size; int s = (int) (angleA/delta); int t = (int) (angleB/delta); float4 c[4]; int coeffIndex = pos.x+4*(s+size*t); c[0] = coeff[coeffIndex]; c[1] = coeff[coeffIndex+1]; c[2] = coeff[coeffIndex+2]; c[3] = coeff[coeffIndex+3]; float da = angleA/delta-s; float db = angleB/delta-t; // Evaluate the spline to determine the energy and gradients. float torsionEnergy = 0.0f; float dEdA = 0.0f; float dEdB = 0.0f; torsionEnergy = da*torsionEnergy + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x; dEdA = db*dEdA + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w; dEdB = da*dEdB + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y; torsionEnergy = da*torsionEnergy + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x; dEdA = db*dEdA + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z; dEdB = da*dEdB + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y; torsionEnergy = da*torsionEnergy + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x; dEdA = db*dEdA + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y; dEdB = da*dEdB + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y; torsionEnergy = da*torsionEnergy + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x; dEdA = db*dEdA + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x; dEdB = da*dEdB + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y; dEdA /= delta; dEdB /= delta; energy += torsionEnergy; // Apply the force to the first torsion. float normCross1 = DOT3(cp0a, cp0a); float normSqrBC = DOT3(v1a, v1a); float normBC = sqrt(normSqrBC); float normCross2 = DOT3(cp1a, cp1a); float dp = 1.0f/normSqrBC; float4 ff = make_float4((-dEdA*normBC)/normCross1, DOT3(v0a, v1a)*dp, DOT3(v2a, v1a)*dp, (dEdA*normBC)/normCross2); float3 internalF0 = make_float3(ff.x*cp0a.x, ff.x*cp0a.y, ff.x*cp0a.z); float3 internalF3 = make_float3(ff.w*cp1a.x, ff.w*cp1a.y, ff.w*cp1a.z); float3 d = make_float3(ff.y*internalF0.x - ff.z*internalF3.x, ff.y*internalF0.y - ff.z*internalF3.y, ff.y*internalF0.z - ff.z*internalF3.z); unsigned int offsetA = atoms1.x+atoms3.x*numAtoms; unsigned int offsetB = atoms1.y+atoms3.y*numAtoms; unsigned int offsetC = atoms1.z+atoms3.z*numAtoms; unsigned int offsetD = atoms1.w+atoms3.w*numAtoms; float4 forceA = forceBuffers[offsetA]; float4 forceB = forceBuffers[offsetB]; float4 forceC = forceBuffers[offsetC]; float4 forceD = forceBuffers[offsetD]; forceA.x += internalF0.x; forceA.y += internalF0.y; forceA.z += internalF0.z; forceB.x += d.x-internalF0.x; forceB.y += d.y-internalF0.y; forceB.z += d.z-internalF0.z; forceC.x += -d.x-internalF3.x; forceC.y += -d.y-internalF3.y; forceC.z += -d.z-internalF3.z; forceD.x += internalF3.x; forceD.y += internalF3.y; forceD.z += internalF3.z; forceBuffers[offsetA] = forceA; forceBuffers[offsetB] = forceB; forceBuffers[offsetC] = forceC; forceBuffers[offsetD] = forceD; // Apply the force to the second torsion. normCross1 = DOT3(cp0b, cp0b); normSqrBC = DOT3(v1b, v1b); normBC = sqrt(normSqrBC); normCross2 = DOT3(cp1b, cp1b); dp = 1.0f/normSqrBC; ff = make_float4((-dEdB*normBC)/normCross1, DOT3(v0b, v1b)*dp, DOT3(v2b, v1b)*dp, (dEdB*normBC)/normCross2); internalF0 = make_float3(ff.x*cp0b.x, ff.x*cp0b.y, ff.x*cp0b.z); internalF3 = make_float3(ff.w*cp1b.x, ff.w*cp1b.y, ff.w*cp1b.z); d = make_float3(ff.y*internalF0.x - ff.z*internalF3.x, ff.y*internalF0.y - ff.z*internalF3.y, ff.y*internalF0.z - ff.z*internalF3.z); offsetA = atoms2.x+atoms4.x*numAtoms; offsetB = atoms2.y+atoms4.y*numAtoms; offsetC = atoms2.z+atoms4.z*numAtoms; offsetD = atoms2.w+atoms4.w*numAtoms; forceA = forceBuffers[offsetA]; forceB = forceBuffers[offsetB]; forceC = forceBuffers[offsetC]; forceD = forceBuffers[offsetD]; forceA.x += internalF0.x; forceA.y += internalF0.y; forceA.z += internalF0.z; forceB.x += d.x-internalF0.x; forceB.y += d.y-internalF0.y; forceB.z += d.z-internalF0.z; forceC.x += -d.x-internalF3.x; forceC.y += -d.y-internalF3.y; forceC.z += -d.z-internalF3.z; forceD.x += internalF3.x; forceD.y += internalF3.y; forceD.z += internalF3.z; forceBuffers[offsetA] = forceA; forceBuffers[offsetB] = forceB; forceBuffers[offsetC] = forceC; forceBuffers[offsetD] = forceD; } energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; } void kCalculateCMAPTorsionForces(gpuContext gpu, CUDAStream& coefficients, CUDAStream& mapPositions, CUDAStream& torsionIndices, CUDAStream& torsionMaps) { kCalculateCMAPTorsionForces_kernel<<sim.blocks, gpu->sim.localForces_threads_per_block>>>(gpu->sim.stride, torsionMaps._length, gpu->sim.pForce4, gpu->sim.pEnergy, gpu->sim.pPosq, coefficients._pDevData, mapPositions._pDevData, torsionIndices._pDevData, torsionMaps._pDevData); LAUNCHERROR("kCalculateCMAPTorsionForces"); }