/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Scott Le Grand, Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include #include #include #include #include #include //#include using namespace std; #include "gputypes.h" static __constant__ cudaGmxSimulation cSim; void SetSettleSim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed"); } void GetSettleSim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed"); } /** * This is based on the setlep FORTRAN routine by Shuichi Miyamoto. See * S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992). */ __global__ #if (__CUDA_ARCH__ >= 200) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) #elif (__CUDA_ARCH__ >= 130) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) #else __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) #endif void kApplyFirstSettle_kernel() { unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; while (pos < cSim.settleConstraints) { // Load data. int4 atomID = cSim.pSettleID[pos]; float2 params = cSim.pSettleParameter[pos]; float4 apos0 = cSim.pOldPosq[atomID.x]; float4 xp0 = cSim.pPosqP[atomID.x]; float4 apos1 = cSim.pOldPosq[atomID.y]; float4 xp1 = cSim.pPosqP[atomID.y]; float4 apos2 = cSim.pOldPosq[atomID.z]; float4 xp2 = cSim.pPosqP[atomID.z]; float m0 = 1.0f/cSim.pVelm4[atomID.x].w; float m1 = 1.0f/cSim.pVelm4[atomID.y].w; float m2 = 1.0f/cSim.pVelm4[atomID.z].w; // Translate the molecule to the origin to improve numerical precision. float3 center = make_float3(apos0.x, apos0.y, apos0.z); apos0.x -= center.x; apos0.y -= center.y; apos0.z -= center.z; apos1.x -= center.x; apos1.y -= center.y; apos1.z -= center.z; apos2.x -= center.x; apos2.y -= center.y; apos2.z -= center.z; // Apply the SETTLE algorithm. float xb0 = apos1.x-apos0.x; float yb0 = apos1.y-apos0.y; float zb0 = apos1.z-apos0.z; float xc0 = apos2.x-apos0.x; float yc0 = apos2.y-apos0.y; float zc0 = apos2.z-apos0.z; float totalMass = m0+m1+m2; float xcom = ((apos0.x+xp0.x)*m0 + (apos1.x+xp1.x)*m1 + (apos2.x+xp2.x)*m2) / totalMass; float ycom = ((apos0.y+xp0.y)*m0 + (apos1.y+xp1.y)*m1 + (apos2.y+xp2.y)*m2) / totalMass; float zcom = ((apos0.z+xp0.z)*m0 + (apos1.z+xp1.z)*m1 + (apos2.z+xp2.z)*m2) / totalMass; float xa1 = apos0.x + xp0.x - xcom; float ya1 = apos0.y + xp0.y - ycom; float za1 = apos0.z + xp0.z - zcom; float xb1 = apos1.x + xp1.x - xcom; float yb1 = apos1.y + xp1.y - ycom; float zb1 = apos1.z + xp1.z - zcom; float xc1 = apos2.x + xp2.x - xcom; float yc1 = apos2.y + xp2.y - ycom; float zc1 = apos2.z + xp2.z - zcom; float xaksZd = yb0*zc0 - zb0*yc0; float yaksZd = zb0*xc0 - xb0*zc0; float zaksZd = xb0*yc0 - yb0*xc0; float xaksXd = ya1*zaksZd - za1*yaksZd; float yaksXd = za1*xaksZd - xa1*zaksZd; float zaksXd = xa1*yaksZd - ya1*xaksZd; float xaksYd = yaksZd*zaksXd - zaksZd*yaksXd; float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd; float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd; float axlng = sqrt(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd); float aylng = sqrt(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd); float azlng = sqrt(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd); float trns11 = xaksXd / axlng; float trns21 = yaksXd / axlng; float trns31 = zaksXd / axlng; float trns12 = xaksYd / aylng; float trns22 = yaksYd / aylng; float trns32 = zaksYd / aylng; float trns13 = xaksZd / azlng; float trns23 = yaksZd / azlng; float trns33 = zaksZd / azlng; float xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0; float yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0; float xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0; float yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0; float za1d = trns13*xa1 + trns23*ya1 + trns33*za1; float xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1; float yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1; float zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1; float xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1; float yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1; float zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1; // --- Step2 A2' --- float rc = 0.5f*params.y; float rb = sqrt(params.x*params.x-rc*rc); float ra = rb*(m1+m2)/totalMass; rb -= ra; float sinphi = za1d / ra; float cosphi = sqrt(1.0f - sinphi*sinphi); float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi); float cospsi = sqrt(1.0f - sinpsi*sinpsi); float ya2d = ra * cosphi; float xb2d = - rc * cospsi; float yb2d = - rb * cosphi - rc *sinpsi * sinphi; float yc2d = - rb * cosphi + rc *sinpsi * sinphi; float xb2d2 = xb2d * xb2d; float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d); float deltx = 2.0f * xb2d + sqrt ( 4.0f * xb2d2 - hh2 + params.y*params.y ); xb2d -= deltx * 0.5f; // --- Step3 al,be,ga --- float alpa = ( xb2d * (xb0d-xc0d) + yb0d * yb2d + yc0d * yc2d ); float beta = ( xb2d * (yc0d-yb0d) + xb0d * yb2d + xc0d * yc2d ); float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; float al2be2 = alpa * alpa + beta * beta; float sinthe = ( alpa*gama - beta * sqrt ( al2be2 - gama * gama ) ) / al2be2; // --- Step4 A3' --- float costhe = sqrt (1.0f - sinthe * sinthe ); float xa3d = - ya2d * sinthe; float ya3d = ya2d * costhe; float za3d = za1d; float xb3d = xb2d * costhe - yb2d * sinthe; float yb3d = xb2d * sinthe + yb2d * costhe; float zb3d = zb1d; float xc3d = - xb2d * costhe - yc2d * sinthe; float yc3d = - xb2d * sinthe + yc2d * costhe; float zc3d = zc1d; // --- Step5 A3 --- float xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d; float ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d; float za3 = trns31*xa3d + trns32*ya3d + trns33*za3d; float xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d; float yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d; float zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d; float xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d; float yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d; float zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d; xp0.x = xcom + xa3 - apos0.x; xp0.y = ycom + ya3 - apos0.y; xp0.z = zcom + za3 - apos0.z; xp1.x = xcom + xb3 - apos1.x; xp1.y = ycom + yb3 - apos1.y; xp1.z = zcom + zb3 - apos1.z; xp2.x = xcom + xc3 - apos2.x; xp2.y = ycom + yc3 - apos2.y; xp2.z = zcom + zc3 - apos2.z; cSim.pPosqP[atomID.x] = xp0; cSim.pPosqP[atomID.y] = xp1; cSim.pPosqP[atomID.z] = xp2; pos += blockDim.x * gridDim.x; } } void kApplyFirstSettle(gpuContext gpu) { // printf("kApplyFirstSettle\n"); if (gpu->sim.settleConstraints > 0) { kApplyFirstSettle_kernel<<sim.blocks, gpu->sim.settle_threads_per_block>>>(); LAUNCHERROR("kApplyFirstSettle"); } } __global__ void #if (__CUDA_ARCH__ >= 200) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) #elif (__CUDA_ARCH__ >= 130) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) #else __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) #endif kApplySecondSettle_kernel() { unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; while (pos < cSim.settleConstraints) { int4 atomID = cSim.pSettleID[pos]; float2 params = cSim.pSettleParameter[pos]; float4 apos0 = cSim.pOldPosq[atomID.x]; float4 xp0 = cSim.pPosq[atomID.x]; float4 apos1 = cSim.pOldPosq[atomID.y]; float4 xp1 = cSim.pPosq[atomID.y]; float4 apos2 = cSim.pOldPosq[atomID.z]; float4 xp2 = cSim.pPosq[atomID.z]; float m0 = 1.0f/cSim.pVelm4[atomID.x].w; float m1 = 1.0f/cSim.pVelm4[atomID.y].w; float m2 = 1.0f/cSim.pVelm4[atomID.z].w; float xb0 = apos1.x-apos0.x; float yb0 = apos1.y-apos0.y; float zb0 = apos1.z-apos0.z; float xc0 = apos2.x-apos0.x; float yc0 = apos2.y-apos0.y; float zc0 = apos2.z-apos0.z; float totalMass = m0+m1+m2; float xcom = ((apos0.x+xp0.x)*m0 + (apos1.x+xp1.x)*m1 + (apos2.x+xp2.x)*m2) / totalMass; float ycom = ((apos0.y+xp0.y)*m0 + (apos1.y+xp1.y)*m1 + (apos2.y+xp2.y)*m2) / totalMass; float zcom = ((apos0.z+xp0.z)*m0 + (apos1.z+xp1.z)*m1 + (apos2.z+xp2.z)*m2) / totalMass; float xa1 = apos0.x + xp0.x - xcom; float ya1 = apos0.y + xp0.y - ycom; float za1 = apos0.z + xp0.z - zcom; float xb1 = apos1.x + xp1.x - xcom; float yb1 = apos1.y + xp1.y - ycom; float zb1 = apos1.z + xp1.z - zcom; float xc1 = apos2.x + xp2.x - xcom; float yc1 = apos2.y + xp2.y - ycom; float zc1 = apos2.z + xp2.z - zcom; float xaksZd = yb0*zc0 - zb0*yc0; float yaksZd = zb0*xc0 - xb0*zc0; float zaksZd = xb0*yc0 - yb0*xc0; float xaksXd = ya1*zaksZd - za1*yaksZd; float yaksXd = za1*xaksZd - xa1*zaksZd; float zaksXd = xa1*yaksZd - ya1*xaksZd; float xaksYd = yaksZd*zaksXd - zaksZd*yaksXd; float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd; float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd; float axlng = sqrt(xaksXd * xaksXd + yaksXd * yaksXd + zaksXd * zaksXd); float aylng = sqrt(xaksYd * xaksYd + yaksYd * yaksYd + zaksYd * zaksYd); float azlng = sqrt(xaksZd * xaksZd + yaksZd * yaksZd + zaksZd * zaksZd); float trns11 = xaksXd / axlng; float trns21 = yaksXd / axlng; float trns31 = zaksXd / axlng; float trns12 = xaksYd / aylng; float trns22 = yaksYd / aylng; float trns32 = zaksYd / aylng; float trns13 = xaksZd / azlng; float trns23 = yaksZd / azlng; float trns33 = zaksZd / azlng; float xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0; float yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0; float xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0; float yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0; float za1d = trns13*xa1 + trns23*ya1 + trns33*za1; float xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1; float yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1; float zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1; float xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1; float yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1; float zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1; // --- Step2 A2' --- float rc = 0.5f*params.y; float rb = sqrt(params.x*params.x-rc*rc); float ra = rb*(m1+m2)/totalMass; rb -= ra; float sinphi = za1d / ra; float cosphi = sqrt(1.0f - sinphi*sinphi); float sinpsi = ( zb1d - zc1d ) / (2*rc*cosphi); float cospsi = sqrt(1.0f - sinpsi*sinpsi); float ya2d = ra * cosphi; float xb2d = - rc * cospsi; float yb2d = - rb * cosphi - rc *sinpsi * sinphi; float yc2d = - rb * cosphi + rc *sinpsi * sinphi; float xb2d2 = xb2d * xb2d; float hh2 = 4.0f * xb2d2 + (yb2d-yc2d) * (yb2d-yc2d) + (zb1d-zc1d) * (zb1d-zc1d); float deltx = 2.0f * xb2d + sqrt ( 4.0f * xb2d2 - hh2 + params.y*params.y ); xb2d -= deltx * 0.5f; // --- Step3 al,be,ga --- float alpa = ( xb2d * (xb0d-xc0d) + yb0d * yb2d + yc0d * yc2d ); float beta = ( xb2d * (yc0d-yb0d) + xb0d * yb2d + xc0d * yc2d ); float gama = xb0d * yb1d - xb1d * yb0d + xc0d * yc1d - xc1d * yc0d; float al2be2 = alpa * alpa + beta * beta; float sinthe = ( alpa*gama - beta * sqrt ( al2be2 - gama * gama ) ) / al2be2; // --- Step4 A3' --- float costhe = sqrt (1.0f - sinthe * sinthe ); float xa3d = - ya2d * sinthe; float ya3d = ya2d * costhe; float za3d = za1d; float xb3d = xb2d * costhe - yb2d * sinthe; float yb3d = xb2d * sinthe + yb2d * costhe; float zb3d = zb1d; float xc3d = - xb2d * costhe - yc2d * sinthe; float yc3d = - xb2d * sinthe + yc2d * costhe; float zc3d = zc1d; // --- Step5 A3 --- float xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d; float ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d; float za3 = trns31*xa3d + trns32*ya3d + trns33*za3d; float xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d; float yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d; float zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d; float xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d; float yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d; float zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d; xp0.x = xcom + xa3; xp0.y = ycom + ya3; xp0.z = zcom + za3; xp1.x = xcom + xb3; xp1.y = ycom + yb3; xp1.z = zcom + zb3; xp2.x = xcom + xc3; xp2.y = ycom + yc3; xp2.z = zcom + zc3; cSim.pPosq[atomID.x] = xp0; cSim.pPosq[atomID.y] = xp1; cSim.pPosq[atomID.z] = xp2; pos += blockDim.x * gridDim.x; } } void kApplySecondSettle(gpuContext gpu) { // printf("kApplySecondSettle\n"); if (gpu->sim.settleConstraints > 0) { kApplySecondSettle_kernel<<sim.blocks, gpu->sim.settle_threads_per_block>>>(); LAUNCHERROR("kApplySecondSettle"); } }