/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009 Stanford University and the Authors. * * Authors: Scott Le Grand, Peter Eastman * * Contributors: * * * * Permission is hereby granted, free of charge, to any person obtaining a * * copy of this software and associated documentation files (the "Software"), * * to deal in the Software without restriction, including without limitation * * the rights to use, copy, modify, merge, publish, distribute, sublicense, * * and/or sell copies of the Software, and to permit persons to whom the * * Software is furnished to do so, subject to the following conditions: * * * * The above copyright notice and this permission notice shall be included in * * all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * * USE OR OTHER DEALINGS IN THE SOFTWARE. * * -------------------------------------------------------------------------- */ #include #include #include #include #include #include #include using namespace std; #include "gputypes.h" #include "cudaKernels.h" struct Atom { float x; float y; float z; float r; float sr; float fx; float fy; float fz; float fb; }; static __constant__ cudaGmxSimulation cSim; void SetCalculateGBVIForces2Sim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed"); } void GetCalculateGBVIForces2Sim(gpuContext gpu) { cudaError_t status; status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation)); RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed"); } #include "kCalculateGBVIAux.h" /** * This file contains the kernel for evalauating the second stage of GBSA. It is included * several times in kCalculateGBVIForces2.cu with different #defines to generate * different versions of the kernels. */ __global__ void kCalculateGBVIForces2a_kernel() { unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x); if( pos >= cSim.atoms )return; float4 apos = cSim.pPosq[pos]; float4 ar = cSim.pGBVIData[pos]; float fb = cSim.pBornForce[pos]; unsigned int posJ = 0; float4 force; force.x = force.y = force.z = force.w = 0.0f; while ( posJ < cSim.atoms ) { float4 aposJ = cSim.pPosq[posJ]; float4 arJ = cSim.pGBVIData[posJ]; float fbJ = cSim.pBornForce[posJ]; float dx = aposJ.x - apos.x; float dy = aposJ.y - apos.y; float dz = aposJ.z - apos.z; float r2 = dx * dx + dy * dy + dz * dz; float r = sqrt(r2); float dE = getGBVI_dE2( r, ar.x, arJ.y, fb ); dE = r > 1.0e-08f ? dE : 0.0f; //dx = dy = dz = 1.0f; float d = dx*dE; force.x -= d; d = dy*dE; force.y -= d; d = dz*dE; force.z -= d; #if 1 dE = getGBVI_dE2( r, arJ.x, ar.y, fbJ ); dE = r > 1.0e-08f ? dE : 0.0f; d = dx*dE; force.x -= d; d = dy*dE; force.y -= d; d = dz*dE; force.z -= d; #endif posJ += 1; } // Write results cSim.pForce4[pos] = force; } // Include versions of the kernels for N^2 calculations. #define METHOD_NAME(a, b) a##N2##b #include "kCalculateGBVIForces2.h" #define USE_OUTPUT_BUFFER_PER_WARP #undef METHOD_NAME #define METHOD_NAME(a, b) a##N2ByWarp##b #include "kCalculateGBVIForces2.h" // Include versions of the kernels with cutoffs. #undef METHOD_NAME #undef USE_OUTPUT_BUFFER_PER_WARP #define USE_CUTOFF #define METHOD_NAME(a, b) a##Cutoff##b #include "kCalculateGBVIForces2.h" #define USE_OUTPUT_BUFFER_PER_WARP #undef METHOD_NAME #define METHOD_NAME(a, b) a##CutoffByWarp##b #include "kCalculateGBVIForces2.h" // Include versions of the kernels with periodic boundary conditions. #undef METHOD_NAME #undef USE_OUTPUT_BUFFER_PER_WARP #define USE_PERIODIC #define METHOD_NAME(a, b) a##Periodic##b #include "kCalculateGBVIForces2.h" #define USE_OUTPUT_BUFFER_PER_WARP #undef METHOD_NAME #define METHOD_NAME(a, b) a##PeriodicByWarp##b #include "kCalculateGBVIForces2.h" void kCalculateGBVIForces2(gpuContext gpu) { //printf("kCalculateGBVIForces2\n"); size_t numWithInteractions; #if 0 kClearForces(gpu); (void) fprintf( stderr, "\nkCalculateGBVIForces2: cleared force prior loop2\n" ); (void) fflush( stderr ); kCalculateGBVIForces2a_kernel<<sim.blocks, 384>>>(); (void) fprintf( stderr, "\ncalled kCalculateGBVIForces2a\n" ); (void) fflush( stderr ); return; #endif switch (gpu->sim.nonbondedMethod) { case NO_CUTOFF: if (gpu->bOutputBufferPerWarp) kCalculateGBVIN2ByWarpForces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits); else kCalculateGBVIN2Forces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits); //(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr ); #define GBVI_DEBUG 0 #if ( GBVI_DEBUG == 1 ) (void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces:\n" ); (void) fflush( stderr ); gpu->psBornForce->Download(); gpu->psForce4->Download(); for( int ii = 0; ii < gpu->natoms; ii++ ){ (void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]\n", ii, gpu->psBornForce->_pSysStream[0][ii], gpu->psForce4->_pSysStream[0][ii].x, gpu->psForce4->_pSysStream[0][ii].y, gpu->psForce4->_pSysStream[0][ii].z, gpu->psForce4->_pSysStream[1][ii].x, gpu->psForce4->_pSysStream[1][ii].y, gpu->psForce4->_pSysStream[1][ii].z ); } for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms*2; ii++ ){ (void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]\n", ii, gpu->psBornForce->_pSysStream[0][ii], gpu->psForce4->_pSysStream[0][ii].x, gpu->psForce4->_pSysStream[0][ii].y, gpu->psForce4->_pSysStream[0][ii].z, gpu->psForce4->_pSysStream[0][ii].w ); } #endif #undef GBVI_DEBUG break; case CUTOFF: numWithInteractions = gpu->psInteractionCount->_pSysData[0]; if (gpu->bOutputBufferPerWarp) kCalculateGBVICutoffByWarpForces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions); else kCalculateGBVICutoffForces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions); break; case PERIODIC: numWithInteractions = gpu->psInteractionCount->_pSysData[0]; if (gpu->bOutputBufferPerWarp) kCalculateGBVIPeriodicByWarpForces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions); else kCalculateGBVIPeriodicForces2_kernel<<sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions); break; } LAUNCHERROR("kCalculateGBVIForces2"); }