Commit bc85b9f0 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Extened Free energy plugin to allow cutoffs; cleaned up code and added tests

parent d4441c15
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifndef __Gpu_GBVI_SOFTCORE_AUX_H__
#define __Gpu_GBVI_SOFTCORE_AUX_H__
/**
* This file contains subroutines used in evaluating quantities associated w/ the GB/VI function
*/
__device__ float getGBVI_L( float r, float x, float S )
{
float rInv = 1.0f/r;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float diff2 = (r + S)*(r - S);
return (1.5f*xInv2)*( (0.25f*rInv) - (xInv/3.0f) + (0.125f*diff2*xInv2*rInv) );
}
__device__ float getGBVI_Volume( float r_ij, float R, float S )
{
float upperBound = r_ij + S;
float rdiffS = r_ij - S;
float lowerBound = R > rdiffS ? R : rdiffS;
float L_upper = getGBVI_L( r_ij, upperBound, S );
float L_lower = getGBVI_L( r_ij, lowerBound, S );
float mask = r_ij < (R - S) ? 0.0f : 1.0f;
float addOn = r_ij < (S - R) ? (1.0f/(R*R*R)) : 0.0f;
return (mask*( L_upper - L_lower ) + addOn);
}
__device__ float getGBVI_dL_dr( float r, float x, float S )
{
float rInv = 1.0f/r;
float rInv2 = rInv*rInv;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float xInv3 = xInv2*xInv;
float diff2 = (r + S)*(r - S);
return ( (-1.5f*xInv2*rInv2)*( 0.25f + 0.125f*diff2*xInv2 ) + 0.375f*xInv3*xInv );
//return 0.0f;
}
__device__ float getGBVI_dL_dx( float r, float x, float S )
{
float rInv = 1.0f/r;
float xInv = 1.0f/x;
float xInv2 = xInv*xInv;
float xInv3 = xInv2*xInv;
float diff = (r + S)*(r - S);
return ( (-1.5f*xInv3)*( (0.5f*rInv) - xInv + (0.5f*diff*xInv2*rInv) ));
}
__device__ float getGBVI_dE2( float r, float R, float S, float bornForce )
{
float diff = S - R;
float absDiff = fabsf( S - R );
float dE = getGBVI_dL_dr( r, r+S, S ) + getGBVI_dL_dx( r, r+S, S );
float mask;
float lowerBound;
if( (R > (r - S)) && (absDiff < r) ){
mask = 0.0f;
lowerBound = R;
} else {
mask = 1.0f;
lowerBound = (r - S);
}
dE -= getGBVI_dL_dr( r, lowerBound, S ) + mask*getGBVI_dL_dx( r, lowerBound, S );
dE = (absDiff >= r) && r >= diff ? 0.0f : dE;
dE *= ( (r > 1.0e-08f) ? (bornForce/r) : 0.0f);
return (-dE);
}
__device__ float getGBVIBornForce2( float bornRadius, float R, float bornForce, float gamma )
{
float ratio = (R/bornRadius);
float returnBornForce = bornForce + (3.0f*gamma*ratio*ratio*ratio)/bornRadius; // 'cavity' term
float br2 = bornRadius*bornRadius;
returnBornForce *= (1.0f/3.0f)*br2*br2;
return returnBornForce;
}
#endif // __Gpu_GBVI_SOFTCORE_AUX_H__
......@@ -37,7 +37,22 @@
#include "kCalculateGBVIAux.h"
__global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
#undef TARGET
//#define TARGET 5443
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#ifdef DEBUG
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
#else
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
#endif
{
extern __shared__ Atom sA[];
......@@ -47,8 +62,6 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
// int end = workUnits / gridDim.x;
// int pos = end - (threadIdx.x >> GRIDBITS) - 1;
#ifdef USE_CUTOFF
float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
#endif
......@@ -85,7 +98,7 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
sA[threadIdx.x].r = ar.x;
sA[threadIdx.x].sr = ar.y;
sA[threadIdx.x].bornRadiusScaleFactor = ar.w;
apos.w = 0.0f;
float bSum = 0.0f;
for (unsigned int j = 0; j < GRID; j++)
{
......@@ -98,55 +111,80 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#if defined USE_PERIODIC
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#if defined USE_CUTOFF
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr && j != tgx)
#else
if (i < cSim.atoms && x+j < cSim.atoms && j != tgx )
#endif
{
r = sqrt(r2);
if ((j != tgx) )
{
apos.w += psA[j].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[j].sr );
}
bSum += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr );
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 1.0f;
pdE2[tjj].x = sqrt(r2);
pdE2[tjj].y = psA[jIdx].sr;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = 1.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = -1.0f;
}
#endif
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += apos.w;
cSim.pBornSum[offset] += bSum;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = apos.w;
cSim.pBornSum[offset] = bSum;
#endif
}
else // 100% utilization
{
} else {
// Read fixed atom data into registers and GRF
unsigned int j = y + tgx;
unsigned int i = x + tgx;
float4 temp = cSim.pPosq[j];
float4 temp1 = cSim.pGBVIData[j];
float4 apos = cSim.pPosq[i]; // Local atom x, y, z, sum
float4 ar = cSim.pGBVIData[i]; // Local atom vr, sr
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].r = temp1.x;
sA[threadIdx.x].sr = temp1.y;
sA[threadIdx.x].bornRadiusScaleFactor = temp1.w;
sA[threadIdx.x].sum = apos.w = 0.0f;
sA[threadIdx.x].sum = 0.0f;
apos.w = 0.0f;
#ifdef USE_CUTOFF
//unsigned int flags = cSim.pInteractionFlag[pos + (blockIdx.x*workUnits)/gridDim.x];
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
}
else if (flags == 0xFFFFFFFF)
//else if (flags )
#endif
{
// Compute all interactions within this block.
......@@ -162,10 +200,10 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+tj < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+tj < cSim.atoms )
#endif
{
r = sqrt(r2);
......@@ -175,9 +213,37 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
apos.w += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr );
psA[tj].sum += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 2.0f;
float R = ar.x;
float S = psA[tj].sr;
pdE2[tjj].x = getGBVI_L( r, (r + S), S );
pdE2[tjj].y = -getGBVI_L( r, (r - S), S );
pdE2[tjj].z = -getGBVI_L( r, R, S );
pdE2[tjj].w = (1.0f/(R*R*R));
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = ar.w*getGBVI_Volume( r, psA[jIdx].r, ar.y );
pdE1[tjj].y = ar.w;
pdE1[tjj].z = psA[jIdx].r;
pdE1[tjj].w = -2.0f;
}
#endif
}
tj = (tj - 1) & (GRID - 1);
}
}
#ifdef USE_CUTOFF
else
......@@ -198,14 +264,15 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+j < cSim.atoms)
#endif
{
r = sqrt(r2);
tempBuffer[threadIdx.x] = ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
tempBuffer[threadIdx.x] = ar.w*getGBVI_Volume( r, psA[j].r, ar.y );
apos.w += psA[j].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[j].sr );
}
// Sum the terms.
......@@ -226,6 +293,7 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
#endif
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += apos.w;
......@@ -237,6 +305,7 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = sA[threadIdx.x].sum;
#endif
}
pos++;
......
......@@ -57,22 +57,14 @@ struct Atom {
static __constant__ cudaGmxSimulation cSim;
void SetCalculateGBVISoftcoreForces2Sim(gpuContext gpu)
void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
//(void) fprintf( stderr, "SetCalculateGBVISoftcoreForces2Sim called.\n" );
status = cudaMemcpyToSymbol(cSim, &gpu->gpuContext->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateGBVISoftcoreForces2Sim copy to cSim failed");
}
void GetCalculateGBVISoftcoreForces2Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
#include "kCalculateGBVISoftcoreAux.h"
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
......@@ -134,6 +126,8 @@ __global__ void kCalculateGBVISoftcoreForces2a_kernel()
}
#define TARGET 0
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
......@@ -167,78 +161,83 @@ __global__ void kCalculateGBVISoftcoreForces2a_kernel()
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
void kCalculateGBVISoftcoreForces2(gpuContext gpu)
void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
{
//printf("kCalculateGBVISoftcoreForces2\n");
size_t numWithInteractions;
#if 0
kClearForces(gpu);
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreForces2: cleared force prior loop2\n" ); (void) fflush( stderr );
kCalculateGBVISoftcoreForces2a_kernel<<<gpu->sim.blocks, 384>>>();
(void) fprintf( stderr, "\ncalled kCalculateGBVISoftcoreForces2a\n" ); (void) fflush( stderr );
return;
#endif
gpuContext gpu = freeEnergyGpu->gpuContext;
switch (gpu->sim.nonbondedMethod)
/*fprintf( stderr,"kCalculateGBVISoftcoreForces2 nonbondedMethod=%d bornForce2_blocks=%d bornForce2_threads_per_block=%d\n",
freeEnergyGpu->freeEnergySim.nonbondedMethod,
gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, gpu->psInteractionCount->_pSysData[0] ); fflush( stderr );
*/
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{
case NO_CUTOFF:
case FREE_ENERGY_NO_CUTOFF:
#ifdef DEBUG
int psize = 64;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
for( int ii = 0; ii < 32; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.0f;
pdE1->_pSysData[ii].z = 0.0f;
pdE1->_pSysData[ii].w = 0.0f;
pdE2->_pSysData[ii].x = 0.0f;
pdE2->_pSysData[ii].y = 0.0f;
pdE2->_pSysData[ii].z = 0.0f;
pdE2->_pSysData[ii].w = 0.0f;
}
pdE1->Upload();
pdE2->Upload();
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
//(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr );
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreForces2: Born radii/force forces:\n" ); (void) fflush( stderr );
gpu->psBornForce->Download();
gpu->psForce4->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[1][ii].x,
gpu->psForce4->_pSysStream[1][ii].y,
gpu->psForce4->_pSysStream[1][ii].z
);
}
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms*2; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[0][ii].w
);
}
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
pdE1->Download();
pdE2->Download();
fprintf( stderr, "Pde\n" );
for( int ii = 0; ii < 32; ii++ ){
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
}
break;
#endif
#undef GBVI_DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
break;
case CUTOFF:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
case FREE_ENERGY_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
else
kCalculateGBVISoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
case PERIODIC:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
case FREE_ENERGY_PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
else
kCalculateGBVISoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
}
LAUNCHERROR("kCalculateGBVISoftcoreForces2");
}
......@@ -29,19 +29,29 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "kCalculateGBVISoftcoreAux.h"
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* This file contains the kernel for evaluating the second stage of GB/VI. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, unsigned int numWorkUnits)
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_BORNFORCE2_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_BORNFORCE2_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit )
{
//METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
......@@ -63,11 +73,17 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
float fb = cSim.pBornForce[i];
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
sA[threadIdx.x].fx = 0.0f;
sA[threadIdx.x].fy = 0.0f;
sA[threadIdx.x].fz = 0.0f;
float3 af;
sA[threadIdx.x].fx = af.x = 0.0f;
sA[threadIdx.x].fy = af.y = 0.0f;
sA[threadIdx.x].fz = af.z = 0.0f;
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
......@@ -82,9 +98,11 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
for (unsigned int j = (tgx+1)&(GRID-1); j != tgx; j = (j+1)&(GRID-1))
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
......@@ -95,31 +113,43 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
// Atom I Born forces and sum
float dE = psA[j].bornRadiusScaleFactor*getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
if (i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
if (i >= cSim.atoms || x+j >= cSim.atoms || (i == (x+j)) || r2 > cSim.nonbondedCutoffSqr)
#else
if(i >= cSim.atoms || x+j >= cSim.atoms || (i == (x+j)) )
#endif
{
dE = 0.0f;
}
#endif
/*
if( i == TARGET ){
pdE1[x+j].x = dE;
pdE1[x+j].y = psA[j].bornRadiusScaleFactor;
pdE1[x+j].z = r;
pdE1[x+j].w = dE1;
}
if( (x+j) == TARGET ){
pdE2[i].x = dE;
pdE2[i].y = psA[j].bornRadiusScaleFactor;
pdE2[i].z = r;
pdE2[i].w = psA[j].sr-ar.x;
}*/
float d = dx * dE;
af.x -= d;
psA[j].fx += d;
d = dy * dE;
af.y -= d;
psA[j].fy += d;
d = dz * dE;
af.z -= d;
psA[j].fz += d;
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
......@@ -181,18 +211,15 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
float dE = psA[tj].bornRadiusScaleFactor*getGBVI_dE2( r, ar.x, psA[tj].sr, fb );
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+tj >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
......@@ -207,18 +234,15 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
// Atom J Born sum term
dE = ar.w*getGBVI_dE2( r, psA[tj].r, ar.y, psA[tj].fb );
#ifdef USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+tj >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
......@@ -254,18 +278,14 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
// Interleaved Atom I and J Born Forces and sum components
float dE = psA[j].bornRadiusScaleFactor*getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+j >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
......@@ -280,18 +300,15 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int
// Atom J Born sum term
dE = ar.w*getGBVI_dE2( r, psA[j].r, ar.y, psA[j].fb );
#ifdef USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+j >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
......
......@@ -34,7 +34,7 @@
using namespace std;
#include "gputypes.h"
#include "GpuObcGbsaSoftcore.h"
#include "freeEnergyGpuTypes.h"
struct Atom {
float x;
......@@ -49,38 +49,19 @@ struct Atom {
float fb;
};
struct cudaFreeEnergySimulationObcGbsaSoftcore {
float* pNonPolarScalingFactors;
};
struct cudaFreeEnergySimulationObcGbsaSoftcore gbsaSimObc2;
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaFreeEnergySimulationObcGbsaSoftcore gbsaSimDev;
static __constant__ cudaFreeEnergyGmxSimulation feSimDev;
extern "C"
void SetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext freeEnergyGpu )
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
status = cudaMemcpyToSymbol(cSim, &freeEnergyGpu->gpuContext->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreForces2Sim copy to cSim failed");
extern "C"
void SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim( float* nonPolarScalingFactors )
{
cudaError_t status;
gbsaSimObc2.pNonPolarScalingFactors = nonPolarScalingFactors;
status = cudaMemcpyToSymbol(gbsaSimDev, &gbsaSimObc2, sizeof(cudaFreeEnergySimulationObcGbsaSoftcore));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim");
status = cudaMemcpyToSymbol( feSimDev, &freeEnergyGpu->freeEnergySim, sizeof(cudaFreeEnergyGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreForces2Sim copy to feSimDev failed");
//(void) fprintf( stderr, "In SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim\n" );
}
void GetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
// Include versions of the kernels for N^2 calculations.
......@@ -116,15 +97,14 @@ void GetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaSoftcoreForces2.h"
void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
{
//printf("kCalculateObcGbsaSoftcoreForces2\n");
//fprintf( stderr, "kCalculateObcGbsaSoftcoreForces2 nonbondedMethod=%d warp=%d\n", gpu->sim.nonbondedMethod, gpu->bOutputBufferPerWarp);
//fprintf( stderr, "kCalculateObcGbsaSoftcoreForces2 nonbondedMethod=%d calling kReduceForces\n", gpu->sim.nonbondedMethod);
//kReduceForces(gpu);
switch (gpu->sim.nonbondedMethod)
gpuContext gpu = freeEnergyGpu->gpuContext;
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{
case NO_CUTOFF:
case FREE_ENERGY_NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
......@@ -132,7 +112,9 @@ void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
break;
case CUTOFF:
case FREE_ENERGY_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
......@@ -140,7 +122,9 @@ void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
break;
case PERIODIC:
case FREE_ENERGY_PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment