Commit 7b89b1bf authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods to remove requirement that nonbonded force buffer size should be doubled for GB/VI and OBC

parent 67b261f6
......@@ -386,6 +386,8 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
// ---------------------------------------------------------------------------------------
_gpuContext* gpu = data.gpu;
// static int call = 0;
// call++;
// write array, ... address's to board
......@@ -398,7 +400,6 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
}
SetCalculateLocalSoftcoreGpuSim( gpu );
SetCalculateCDLJSoftcoreGpuSim( gpu );
// (void) fprintf( log, "Calling SetCalculateLocalSoftcoreGpuSim\n" ); fflush( stderr );
// flip strides (unsure if this is needed)
......@@ -413,20 +414,20 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
}
}
#endif
}
// calculate nonbonded ixns here, only if implicit solvent is inactive
if ( !getIncludeGBSA() && !getIncludeGBVI() ) {
//kClearForces(gpu);
kCalculateCDLJSoftcoreForces(gpu);
}
// local LJ-14 forces
kCalculateLocalSoftcoreForces(gpu);
//kPrintForces(gpu, "Post kCalculateLocalSoftcoreForces", call );
//kReduceForces(gpu);
//exit(0);
}
double CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeEnergy(ContextImpl& context) {
......@@ -527,6 +528,7 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
_gpuContext* gpu = data.gpu;
int debug = 1;
static int call = 0;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize() was called
......@@ -535,8 +537,6 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
setSim++;
SetCalculateObcGbsaSoftcoreBornSumSim( gpu );
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
//SetCalculateObcGbsaForces2Sim( gpu );
SetCalculateObcGbsaSoftcoreForces2Sim( gpu );
}
......@@ -546,8 +546,11 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
// calculate Born radii and first loop of Obc forces
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1\n", methodName.c_str() );
(void) fflush( stderr );
call++;
if( log ){
(void) fprintf( log, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1\n", methodName.c_str() );
(void) fflush( log );
}
}
kClearBornForces(gpu);
......@@ -555,9 +558,10 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
kReduceObcGbsaBornSum(gpu);
kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
//kPrintForces(gpu, "Post kCalculateCDLJObcGbsaSoftcoreForces1", call );
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kReduceObcGbsaBornForces\n", methodName.c_str() );
(void) fflush( stderr );
(void) fprintf( log, "\n%s: calling kReduceObcGbsaBornForces\n", methodName.c_str() );
(void) fflush( log );
}
// compute Born forces
......@@ -565,13 +569,12 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
kReduceObcGbsaSoftcoreBornForces(gpu);
if( debug && log ){
(void) fprintf( stderr, "\n%s calling kCalculateObcGbsaForces2\n", methodName.c_str() );
(void) fflush( stderr );
(void) fprintf( log, "\n%s calling kCalculateObcGbsaForces2\n", methodName.c_str() );
(void) fflush( log );
}
// second loop of Obc GBSA forces
//kCalculateObcGbsaForces2(gpu);
kCalculateObcGbsaSoftcoreForces2(gpu);
}
......@@ -668,6 +671,7 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
_gpuContext* gpu = data.gpu;
int debug = 1;
static int call = 0;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
......@@ -676,36 +680,40 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
setSim++;
SetCalculateGBVISoftcoreBornSumGpuSim( gpu );
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
SetCalculateGBVIForces2Sim( gpu );
SetCalculateGBVISoftcoreForces2Sim( gpu );
}
// required!!
// required?
gpu->bRecalculateBornRadii = true; // fixed
//kClearForces(gpu);
// calculate Born radii and first loop of GB/VI forces
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s\n", methodName.c_str(),
getQuinticScaling() ? "kReduceGBVIBornSumQuinticScaling" : "kReduceGBVIBornSum" );
(void) fflush( stderr );
call++;
if( log ){
(void) fprintf( log, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s\n", methodName.c_str(),
getQuinticScaling() ? "kReduceGBVIBornSumQuinticScaling" : "kReduceGBVIBornSum" );
(void) fflush( log );
}
}
kClearBornForces(gpu);
kCalculateGBVISoftcoreBornSum(gpu);
if( getQuinticScaling() ){
//(void) fprintf( stderr, "\n%s: calling kReduceGBVIBornSumQuinticScaling\n", methodName.c_str() ); fflush( stderr );
kReduceGBVIBornSumQuinticScaling(gpu, gpuGBVISoftcore );
} else {
kReduceGBVIBornSum(gpu);
}
kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
//kPrintForces(gpu, "Post kCalculateCDLJObcGbsaSoftcoreForces1", call );
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling %s\n", methodName.c_str(),
(void) fprintf( log, "\n%s: calling %s\n", methodName.c_str(),
getQuinticScaling() ? "kReduceGBVIBornForcesQuinticScaling" : "kReduceObcGbsaBornForces" );
(void) fflush( stderr );
(void) fflush( log );
}
// compute Born forces
......@@ -719,14 +727,13 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
}
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateGBVIForces2\n", methodName.c_str() );
(void) fflush( stderr );
(void) fprintf( log, "\n%s: calling kCalculateGBVIForces2\n", methodName.c_str() );
(void) fflush( log );
}
// second loop of GB/VI forces
kCalculateGBVIForces2(gpu);
//kReduceForces(gpu);
kCalculateGBVISoftcoreForces2(gpu);
}
double CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeEnergy(ContextImpl& context) {
......
......@@ -103,11 +103,18 @@ void SetCalculateGBVISoftcoreBornSumGpuSim( gpuContext gpu);
extern "C"
void SetCalculateGBVISoftcoreSupplementarySim( GpuGBVISoftcore* gpuGBVISoftcore );
extern "C"
void SetCalculateGBVISoftcoreForces2Sim(gpuContext gpu);
extern "C"
void GetCalculateGBVISoftcoreForces2Sim(gpuContext gpu);
// kernel calls to device
extern void kReduceGBVIBornSumQuinticScaling( gpuContext gpu, GpuGBVISoftcore* gpuGBVISoftcore );
extern void kCalculateGBVISoftcoreBornSum( gpuContext gpu );
extern void kReduceGBVIBornForcesQuinticScaling( gpuContext gpu );
extern void kCalculateGBVISoftcoreForces2( gpuContext gpu );
// Obc softcore
......@@ -156,6 +163,7 @@ extern void kCalculateObcGbsaSoftcoreBornSum( gpuContext gpu );
// this method is not needed; the OpenMM version can be used
extern void kCalculateObcGbsaSoftcoreForces2( gpuContext gpu );
extern void kPrintForces( gpuContext gpu, std::string idString, int call );
// shared
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifndef __GpuGBVIAUX_H__
#define __GpuGBVIAUX_H__
/**
* This file contains subroutines used in evaluating quantities associated w/ the GB/VI function
*/
__device__ float getGBVI_L( float r, float x, float S )
{
float rInv = 1.0f/r;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float diff2 = (r + S)*(r - S);
return (1.5f*xInv2)*( (0.25f*rInv) - (xInv/3.0f) + (0.125f*diff2*xInv2*rInv) );
}
__device__ float getGBVI_Volume( float r_ij, float R, float S )
{
float upperBound = r_ij + S;
float rdiffS = r_ij - S;
float lowerBound = R > rdiffS ? R : rdiffS;
float L_upper = getGBVI_L( r_ij, upperBound, S );
float L_lower = getGBVI_L( r_ij, lowerBound, S );
float mask = r_ij < (R - S) ? 0.0f : 1.0f;
float addOn = r_ij < (S - R) ? (1.0f/(R*R*R)) : 0.0f;
return (mask*( L_upper - L_lower ) + addOn);
}
__device__ float getGBVI_dL_dr( float r, float x, float S )
{
float rInv = 1.0f/r;
float rInv2 = rInv*rInv;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float xInv3 = xInv2*xInv;
float diff2 = (r + S)*(r - S);
return ( (-1.5f*xInv2*rInv2)*( 0.25f + 0.125f*diff2*xInv2 ) + 0.375f*xInv3*xInv );
//return 0.0f;
}
__device__ float getGBVI_dL_dx( float r, float x, float S )
{
float rInv = 1.0f/r;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float xInv3 = xInv2*xInv;
float diff = (r + S)*(r - S);
return ( (-1.5f*xInv3)*( (0.5f*rInv) - xInv + (0.5f*diff*xInv2*rInv) ));
}
__device__ float getGBVI_dE2( float r, float R, float S, float bornForce )
{
float diff = S - R;
float absDiff = fabsf( S - R );
float dE = getGBVI_dL_dr( r, r+S, S ) + getGBVI_dL_dx( r, r+S, S );
float mask;
float lowerBound;
if( (R > (r - S)) && (absDiff < r) ){
mask = 0.0f;
lowerBound = R;
} else {
mask = 1.0f;
lowerBound = (r - S);
}
dE -= getGBVI_dL_dr( r, lowerBound, S ) + mask*getGBVI_dL_dx( r, lowerBound, S );
dE = (absDiff >= r) && r >= diff ? 0.0f : dE;
dE *= ( (r > 1.0e-08f) ? (bornForce/r) : 0.0f);
return (-dE);
}
__device__ float getGBVIBornForce2( float bornRadius, float R, float bornForce, float gamma )
{
float ratio = (R/bornRadius);
float returnBornForce = bornForce + (3.0f*gamma*ratio*ratio*ratio)/bornRadius; // 'cavity' term
float br2 = bornRadius*bornRadius;
returnBornForce *= (1.0f/3.0f)*br2*br2;
return returnBornForce;
}
#endif // __GpuGBVIAUX_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using namespace std;
#include "gputypes.h"
#include "cudaKernels.h"
#include "GpuFreeEnergyCudaKernels.h"
struct Atom {
float x;
float y;
float z;
float r;
float sr;
float fx;
float fy;
float fz;
float fb;
};
static __constant__ cudaGmxSimulation cSim;
void SetCalculateGBVISoftcoreForces2Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateGBVISoftcoreForces2Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
#include "kCalculateGBVISoftcoreAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void kCalculateGBVISoftcoreForces2a_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
if( pos >= cSim.atoms )return;
float4 apos = cSim.pPosq[pos];
float4 ar = cSim.pGBVIData[pos];
float fb = cSim.pBornForce[pos];
unsigned int posJ = 0;
float4 force;
force.x = force.y = force.z = force.w = 0.0f;
while ( posJ < cSim.atoms )
{
float4 aposJ = cSim.pPosq[posJ];
float4 arJ = cSim.pGBVIData[posJ];
float fbJ = cSim.pBornForce[posJ];
float dx = aposJ.x - apos.x;
float dy = aposJ.y - apos.y;
float dz = aposJ.z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, arJ.y, fb );
dE = r > 1.0e-08f ? dE : 0.0f;
//dx = dy = dz = 1.0f;
float d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#if 1
dE = getGBVI_dE2( r, arJ.x, ar.y, fbJ );
dE = r > 1.0e-08f ? dE : 0.0f;
d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#endif
posJ += 1;
}
// Write results
cSim.pForce4a[pos] = force;
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
void kCalculateGBVISoftcoreForces2(gpuContext gpu)
{
//printf("kCalculateGBVISoftcoreForces2\n");
size_t numWithInteractions;
#if 0
kClearForces(gpu);
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreForces2: cleared force prior loop2\n" ); (void) fflush( stderr );
kCalculateGBVISoftcoreForces2a_kernel<<<gpu->sim.blocks, 384>>>();
(void) fprintf( stderr, "\ncalled kCalculateGBVISoftcoreForces2a\n" ); (void) fflush( stderr );
return;
#endif
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
else
kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
//(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr );
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreForces2: Born radii/force forces:\n" ); (void) fflush( stderr );
gpu->psBornForce->Download();
gpu->psForce4->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[1][ii].x,
gpu->psForce4->_pSysStream[1][ii].y,
gpu->psForce4->_pSysStream[1][ii].z
);
}
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms*2; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[0][ii].w
);
}
#endif
#undef GBVI_DEBUG
break;
case CUTOFF:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
else
kCalculateGBVISoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
break;
case PERIODIC:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
else
kCalculateGBVISoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
break;
}
LAUNCHERROR("kCalculateGBVISoftcoreForces2");
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, unsigned int numWorkUnits)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
#endif
unsigned int lasty = -0xFFFFFFFF;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
x = (x >> 17) << GRIDBITS;
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int i = x + tgx;
float4 apos = cSim.pPosq[i];
float4 ar = cSim.pGBVIData[i];
float fb = cSim.pBornForce[i];
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
float3 af;
sA[threadIdx.x].fx = af.x = 0.0f;
sA[threadIdx.x].fy = af.y = 0.0f;
sA[threadIdx.x].fz = af.z = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].r = ar.x;
sA[threadIdx.x].sr = ar.y;
sA[threadIdx.x].fb = fb;
for (unsigned int j = (tgx+1)&(GRID-1); j != tgx; j = (j+1)&(GRID-1))
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
// Atom I Born forces and sum
float dE = getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
if (i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
psA[j].fx += d;
d = dy * dE;
af.y -= d;
psA[j].fy += d;
d = dz * dE;
af.z -= d;
psA[j].fz += d;
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4[offset];
of.x += af.x + sA[threadIdx.x].fx;
of.y += af.y + sA[threadIdx.x].fy;
of.z += af.z + sA[threadIdx.x].fz;
cSim.pForce4[offset] = of;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
of = cSim.pForce4[offset];
of.x += af.x + sA[threadIdx.x].fx;
of.y += af.y + sA[threadIdx.x].fy;
of.z += af.z + sA[threadIdx.x].fz;
of.w = 0.0f;
cSim.pForce4[offset] = of;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
unsigned int j = y + tgx;
float4 temp = cSim.pPosq[j];
float4 temp1 = cSim.pGBVIData[j];
float fb = cSim.pBornForce[j];
sA[threadIdx.x].fb = fb;
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].r = temp1.x;
sA[threadIdx.x].sr = temp1.y;
}
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
}
else if (flags == 0xFFFFFFFF)
#endif
{
// Compute all interactions within this block.
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, psA[tj].sr, fb );
#if defined USE_PERIODIC
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
psA[tj].fx += d;
d = dy * dE;
af.y -= d;
psA[tj].fy += d;
d = dz * dE;
af.z -= d;
psA[tj].fz += d;
// Atom J Born sum term
dE = getGBVI_dE2( r, psA[tj].r, ar.y, psA[tj].fb );
#ifdef USE_PERIODIC
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
af.x -= dx;
af.y -= dy;
af.z -= dz;
tj = (tj + 1) & (GRID - 1);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
// Interleaved Atom I and J Born Forces and sum components
float dE = getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
tempBuffer[threadIdx.x].x = d;
d = dy * dE;
af.y -= d;
tempBuffer[threadIdx.x].y = d;
d = dz * dE;
af.z -= d;
tempBuffer[threadIdx.x].z = d;
// Atom J Born sum term
dE = getGBVI_dE2( r, psA[j].r, ar.y, psA[j].fb );
#ifdef USE_PERIODIC
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
tempBuffer[threadIdx.x].x += dx;
tempBuffer[threadIdx.x].y += dy;
tempBuffer[threadIdx.x].z += dz;
af.x -= dx;
af.y -= dy;
af.z -= dz;
// Sum the forces on atom j.
if (tgx % 2 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+1].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+1].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+1].z;
}
if (tgx % 4 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+2].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+2].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+2].z;
}
if (tgx % 8 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+4].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+4].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+4].z;
}
if (tgx % 16 == 0)
{
tempBuffer[threadIdx.x].x += tempBuffer[threadIdx.x+8].x;
tempBuffer[threadIdx.x].y += tempBuffer[threadIdx.x+8].y;
tempBuffer[threadIdx.x].z += tempBuffer[threadIdx.x+8].z;
}
if (tgx == 0)
{
psA[j].fx += tempBuffer[threadIdx.x].x + tempBuffer[threadIdx.x+16].x;
psA[j].fy += tempBuffer[threadIdx.x].y + tempBuffer[threadIdx.x+16].y;
psA[j].fz += tempBuffer[threadIdx.x].z + tempBuffer[threadIdx.x+16].z;
}
}
}
}
#endif
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
of = cSim.pForce4[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4[offset] = of;
offset = y + tgx + warp*cSim.stride;
of = cSim.pForce4[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4[offset] = of;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
of = cSim.pForce4[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
of.w = 0.0f;
cSim.pForce4[offset] = of;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
of = cSim.pForce4[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4[offset] = of;
#endif
}
lasty = y;
pos++;
}
}
......@@ -395,3 +395,85 @@ fprintf( stderr, "kCalculateCDLJSoftcoreForces: bOutputBufferPerWarp=%u blks=%u
#endif
}
}
void kPrintForces(gpuContext gpu, std::string idString, int call )
{
// printf("kReduceForces\n");
#define GBVI_DEBUG 4
#if ( GBVI_DEBUG == 4 )
gpu->psBornRadii->Download();
gpu->psObcData->Download();
gpu->psObcChain->Download();
gpu->psBornForce->Download();
gpu->psForce4->Download();
gpu->psPosq4->Download();
int maxPrint = 0;
int nanHit = 0;
int targetIndex = 852;
float maxForce = 3.0e+04;
float maxPosition = 2.0e+02;
for( int ii = 0; ii < gpu->natoms; ii++ ){
int hit = 0;
float dist = sqrtf( gpu->psPosq4->_pSysStream[0][ii].x*gpu->psPosq4->_pSysStream[0][ii].x +
gpu->psPosq4->_pSysStream[0][ii].y*gpu->psPosq4->_pSysStream[0][ii].y +
gpu->psPosq4->_pSysStream[0][ii].z*gpu->psPosq4->_pSysStream[0][ii].z );
if( fabs( gpu->psForce4->_pSysStream[0][ii].x ) > maxForce ||
fabs( gpu->psForce4->_pSysStream[0][ii].y ) > maxForce ||
fabs( gpu->psForce4->_pSysStream[0][ii].z ) > maxForce ||
// gpu->psBornRadii->_pSysStream[0][ii] <= 0.0 ||
dist > maxPosition ||
isnan( gpu->psForce4->_pSysStream[0][ii].x ) ||
isnan( gpu->psForce4->_pSysStream[0][ii].y ) ||
isnan( gpu->psForce4->_pSysStream[0][ii].z ) ){
hit = 1;
} else {
hit = 0;
}
if( ii == targetIndex || ii == (targetIndex+1) || ii == (targetIndex+2) )hit = 1;
if( isnan( gpu->psBornForce->_pSysStream[0][ii] ) ||
isnan( gpu->psBornRadii->_pSysStream[0][ii] ) ||
isnan( gpu->psObcChain->_pSysStream[0][ii] ) ||
isnan( gpu->psForce4->_pSysStream[0][ii].x ) ||
isnan( gpu->psForce4->_pSysStream[0][ii].y ) ||
isnan( gpu->psForce4->_pSysStream[0][ii].z ) ){
hit = 1;
nanHit = 1;
}
//if( hit || ii < maxPrint || ii >= (gpu->natoms - maxPrint) )
if( hit ){
static int firstHit = 1;
if( firstHit ){
firstHit = 0;
(void) fprintf( stderr, "\nkPrintForces: %d [r, scl q] b[r/c/f] f[] x[] Born radii/force (%p %p)\n", call,
gpu->psBornForce, gpu->psBornForce->_pDevStream[0] );
}
(void) fprintf( stderr, "%6d [%8.3f %8.3f %8.3f] b[%13.6e %13.6e %13.6e] f[%13.6e %13.6e %13.6e] x[%13.6e %13.6e %13.6e] %10.3e %s %s %d\n",
ii,
(gpu->psObcData->_pSysStream[0][ii].x + 0.009f),
(gpu->psObcData->_pSysStream[0][ii].y/gpu->psObcData->_pSysStream[0][ii].x),
gpu->psPosq4->_pSysStream[0][ii].w,
gpu->psBornRadii->_pSysStream[0][ii],
gpu->psObcChain->_pSysStream[0][ii],
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psPosq4->_pSysStream[0][ii].x,
gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z, dist,
(hit ? "XXXXXXX" : "" ), idString.c_str(), call );
}
}
(void) fflush( stderr );
// if( nanHit )exit(0);
#endif
}
......@@ -135,12 +135,12 @@ __global__ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4b[offset];
of = cSim.pForce4[offset];
of.x += af.x + sA[threadIdx.x].fx;
of.y += af.y + sA[threadIdx.x].fy;
of.z += af.z + sA[threadIdx.x].fz;
of.w = 0.0f;
cSim.pForce4b[offset] = of;
cSim.pForce4[offset] = of;
}
else
{
......@@ -400,22 +400,22 @@ __global__ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4b[offset];
of = cSim.pForce4[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
cSim.pForce4b[offset] = of;
cSim.pForce4[offset] = of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = y + tgx + warp*cSim.stride;
#else
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4b[offset];
of = cSim.pForce4[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
cSim.pForce4b[offset] = of;
cSim.pForce4[offset] = of;
}
lasty = y;
pos++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment