/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2010 Stanford University and the Authors. * * Authors: Scott Le Grand, Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include #include #include #include #include #include #include using namespace std; #include "gputypes.h" #include "cudatypes.h" static __constant__ cudaGmxSimulation cSim; static __constant__ Expression<256> forceExp; static __constant__ Expression<256> energyExp; #include "kEvaluateExpression.h" void SetCalculateCustomTorsionForcesSim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed"); } void GetCalculateCustomTorsionForcesSim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed"); } void SetCustomTorsionForceExpression(const Expression<256>& expression) { cudaError_t status; status = cudaMemcpyToSymbol(forceExp, &expression, sizeof(forceExp)); RTERROR(status, "SetCustomTorsionForceExpression: cudaMemcpyToSymbol failed"); } void SetCustomTorsionEnergyExpression(const Expression<256>& expression) { cudaError_t status; status = cudaMemcpyToSymbol(energyExp, &expression, sizeof(energyExp)); RTERROR(status, "SetCustomTorsionEnergyExpression: cudaMemcpyToSymbol failed"); } void SetCustomTorsionGlobalParams(const vector& paramValues) { cudaError_t status; status = cudaMemcpyToSymbol(globalParams, ¶mValues[0], paramValues.size()*sizeof(float)); RTERROR(status, "SetCustomTorsionGlobalParams: cudaMemcpyToSymbol failed"); } #define LOCAL_HACK_PI 3.1415926535897932384626433832795 #define DOT3(v1, v2) (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z) #define CROSS_PRODUCT(v1, v2) make_float3(v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x) #define GETNORMEDDOTPRODUCT(v1, v2, dp) \ { \ dp = DOT3(v1, v2); \ float norm1 = DOT3(v1, v1); \ float norm2 = DOT3(v2, v2); \ dp /= sqrtf(norm1 * norm2); \ dp = min(dp, 1.0f); \ dp = max(dp, -1.0f); \ } #define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \ { \ float dp; \ GETNORMEDDOTPRODUCT(v1, v2, dp); \ if (dp > 0.99f || dp < -0.99f) { \ float3 cross = CROSS_PRODUCT(v1, v2); \ float scale = DOT3(v1, v1)*DOT3(v2, v2); \ angle = asinf(sqrtf(DOT3(cross, cross)/scale)); \ if (dp < 0.0f) \ angle = LOCAL_HACK_PI-angle; \ } \ else { \ angle = acosf(dp); \ } \ } #define GETDIHEDRALANGLEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle) \ { \ cp0 = CROSS_PRODUCT(vector1, vector2); \ cp1 = CROSS_PRODUCT(vector2, vector3); \ GETANGLEBETWEENTWOVECTORS(cp0, cp1, angle); \ float dp = DOT3(signVector, cp1); \ angle = (dp >= 0) ? angle : -angle; \ } __global__ #if (__CUDA_ARCH__ >= 200) __launch_bounds__(GF1XX_LOCALFORCES_THREADS_PER_BLOCK, 1) #elif (__CUDA_ARCH__ >= 120) __launch_bounds__(GT2XX_LOCALFORCES_THREADS_PER_BLOCK, 1) #else __launch_bounds__(G8X_LOCALFORCES_THREADS_PER_BLOCK, 1) #endif void kCalculateCustomTorsionForces_kernel() { extern __shared__ float stack[]; float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x]; unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; float totalEnergy = 0.0f; while (pos < cSim.customTorsions) { int4 atom = cSim.pCustomTorsionID1[pos]; int4 atom2 = cSim.pCustomTorsionID2[pos]; float4 params = cSim.pCustomTorsionParams[pos]; float4 a1 = cSim.pPosq[atom.x]; float4 a2 = cSim.pPosq[atom.y]; float4 a3 = cSim.pPosq[atom.z]; float4 a4 = cSim.pPosq[atom.w]; float3 v0 = make_float3(a1.x-a2.x, a1.y-a2.y, a1.z-a2.z); float3 v1 = make_float3(a3.x-a2.x, a3.y-a2.y, a3.z-a2.z); float3 v2 = make_float3(a3.x-a4.x, a3.y-a4.y, a3.z-a4.z); float3 cp0, cp1; float dihedralAngle; GETDIHEDRALANGLEBETWEENTHREEVECTORS(v0, v1, v2, v0, cp0, cp1, dihedralAngle); VARIABLE(0) = dihedralAngle; VARIABLE(1) = params.x; VARIABLE(2) = params.y; VARIABLE(3) = params.z; VARIABLE(4) = params.w; float dEdAngle = kEvaluateExpression_kernel(&forceExp, stack, variables); totalEnergy += kEvaluateExpression_kernel(&energyExp, stack, variables); float normBC = sqrtf(DOT3(v1, v1)); float dp = 1.0f / DOT3(v1, v1); float4 ff = make_float4((-dEdAngle*normBC)/DOT3(cp0, cp0), DOT3(v0, v1)*dp, DOT3(v2, v1)*dp, (dEdAngle*normBC)/DOT3(cp1, cp1)); float3 internalF0 = make_float3(ff.x*cp0.x, ff.x*cp0.y, ff.x*cp0.z); float3 internalF3 = make_float3(ff.w*cp1.x, ff.w*cp1.y, ff.w*cp1.z); float3 s = make_float3(ff.y*internalF0.x - ff.z*internalF3.x, ff.y*internalF0.y - ff.z*internalF3.y, ff.y*internalF0.z - ff.z*internalF3.z); unsigned int offsetA = atom.x + atom2.x * cSim.stride; unsigned int offsetB = atom.y + atom2.y * cSim.stride; unsigned int offsetC = atom.z + atom2.z * cSim.stride; unsigned int offsetD = atom.w + atom2.w * cSim.stride; float4 forceA = cSim.pForce4[offsetA]; float4 forceB = cSim.pForce4[offsetB]; float4 forceC = cSim.pForce4[offsetC]; float4 forceD = cSim.pForce4[offsetD]; forceA.x += internalF0.x; forceA.y += internalF0.y; forceA.z += internalF0.z; forceB.x += -internalF0.x + s.x; forceB.y += -internalF0.y + s.y; forceB.z += -internalF0.z + s.z; forceC.x += -internalF3.x - s.x; forceC.y += -internalF3.y - s.y; forceC.z += -internalF3.z - s.z; forceD.x += internalF3.x; forceD.y += internalF3.y; forceD.z += internalF3.z; cSim.pForce4[offsetA] = forceA; cSim.pForce4[offsetB] = forceB; cSim.pForce4[offsetC] = forceC; cSim.pForce4[offsetD] = forceD; pos += blockDim.x * gridDim.x; } cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += totalEnergy; } void kCalculateCustomTorsionForces(gpuContext gpu) { // printf("kCalculateCustomTorsionForces\n"); int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float); int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread; int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64); kCalculateCustomTorsionForces_kernel<<sim.blocks, threads, memoryPerThread*threads>>>(); LAUNCHERROR("kCalculateCustomTorsionForces"); }