Commit 25058a77 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Replaced calculation of Born radii using OBC algorithm w/ Grycuk algorithm

TorsionTorsion grids reordered, if needed, so that first angle is the 'slow' index
Fixes for MonteCarloBarostat 
parent e772769e
......@@ -18,6 +18,7 @@
# ----------------------------------------------------------------------------
#SET(CREATE_SERIALIZABLE_OPENMM_AMOEBA OFF CACHE BOOL "Build verison of OpenMMAmoeba w/ backdoor serialization capability")
#SET(CREATE_SERIALIZABLE_OPENMM_AMOEBA TRUE )
SET(CREATE_SERIALIZABLE_OPENMM_AMOEBA FALSE )
# ----------------------------------------------------------------------------
......
......@@ -41,6 +41,7 @@
namespace OpenMM {
typedef std::vector< std::vector< std::vector<double> > > TorsionTorsionGrid;
typedef std::vector< std::vector< std::vector<float> > > TorsionTorsionGridFloat;
/**
* This class implements the Amoeba torsion-torsion interaction
......@@ -198,6 +199,13 @@ public:
}
}
}
/*
for( unsigned int kk = 0; kk < grid.size(); kk++ ){
for( unsigned int jj = 0; jj < grid[kk].size(); jj++ ){
fprintf( stderr, "xGrid %4d %4d %12.3f %12.3f %15.7e %15.7e\n", kk, jj, grid[kk][jj][0], grid[kk][jj][1], grid[kk][jj][2], grid[kk][jj][2]/4.184 );
}
}
*/
_startValues[0] = _grid[0][0][0];
_startValues[1] = _grid[0][0][1];
......
......@@ -60,6 +60,8 @@ public:
return std::map<std::string, double>(); // This force field doesn't define any parameters.
}
std::vector<std::string> getKernelNames();
static void reorderGrid( const TorsionTorsionGrid& grid, TorsionTorsionGrid& reorderedGrid );
private:
AmoebaTorsionTorsionForce& owner;
Kernel kernel;
......
......@@ -54,6 +54,121 @@ double AmoebaTorsionTorsionForceImpl::calcForcesAndEnergy(ContextImpl& context,
return dynamic_cast<CalcAmoebaTorsionTorsionForceKernel&>(kernel.getImpl()).execute(context, includeForces, includeEnergy);
}
struct IntPair {
unsigned int index1;
unsigned int index2;
};
typedef std::map< double, struct IntPair > Map_Double_IntPair;
typedef Map_Double_IntPair::iterator Map_Double_IntPairI;
typedef Map_Double_IntPair::const_iterator Map_Double_IntPairCI;
typedef std::map< double, Map_Double_IntPair > Map_Double_MapDoubleIntPair;
typedef Map_Double_MapDoubleIntPair::iterator Map_Double_MapDoubleIntPairI;
typedef Map_Double_MapDoubleIntPair::const_iterator Map_Double_MapDoubleIntPairCI;
void AmoebaTorsionTorsionForceImpl::reorderGrid( const TorsionTorsionGrid& grid, TorsionTorsionGrid& reorderedGrid ){
reorderedGrid.resize( grid.size() );
std::vector<Map_Double_IntPair> map_Double_IntPair_Vector( grid.size() );
Map_Double_MapDoubleIntPair mapAngles;
//(void) fprintf( stderr, "AmoebaTorsionTorsionForceImpl::reorder grid\n" );
// (1) set dimensions for reorderd grid
// (2) build map:
// map[angleX][angleY] = <ii, jj> indices
// assume map keys are sorted from least to greatest
for (unsigned int ii = 0; ii < grid.size(); ii++) {
reorderedGrid[ii].resize( grid[ii].size() );
for (unsigned int jj = 0; jj < grid[ii].size(); jj++) {
reorderedGrid[ii][jj].resize( grid[ii][jj].size() );
double angleX = grid[ii][jj][0];
double angleY = grid[ii][jj][1];
if( mapAngles.find( angleX ) == mapAngles.end() ){
if( map_Double_IntPair_Vector[ii].size() > 0 ){
char buffer[1024];
(void) sprintf( buffer, "TorsionTorsion grid reorder: x-angle not set correctly: x=%15.7e y=%15.7e size=%u should be zero; ii/jj indies=%u %u.\n",
angleX, angleY, static_cast<unsigned int>(map_Double_IntPair_Vector[ii].size()), ii, jj );
throw OpenMMException(buffer);
}
mapAngles[angleX] = map_Double_IntPair_Vector[ii];
}
Map_Double_IntPair& map_Double_IntPair = mapAngles[angleX];
if( map_Double_IntPair.find( angleY ) != map_Double_IntPair.end() ){
char buffer[1024];
(void) sprintf( buffer, "TorsionTorsion grid reorder: angle pair found twice: %15.7e %15.7e %u\n", angleX, angleY, static_cast<unsigned int>(map_Double_IntPair.size()) );
throw OpenMMException(buffer);
}
struct IntPair pair;
pair.index1 = ii;
pair.index2 = jj;
map_Double_IntPair[angleY] = pair;
}
}
#if 0
(void) fprintf( stderr, "TorsionTorsion grid reorder map\n" );
for( Map_Double_MapDoubleIntPairCI ii = mapAngles.begin(); ii != mapAngles.end(); ii++ ){
double angleX = ii->first;
Map_Double_IntPair map_Double_IntPair = ii->second;
(void) fprintf( stderr, " %15.7e %u \n", angleX, static_cast<unsigned int>(map_Double_IntPair.size()) );
}
for( Map_Double_MapDoubleIntPairCI ii = mapAngles.begin(); ii != mapAngles.end(); ii++ ){
double angleX = ii->first;
Map_Double_IntPair map_Double_IntPair = ii->second;
for( Map_Double_IntPairCI jj = map_Double_IntPair.begin(); jj != map_Double_IntPair.end(); jj++ ){
double angle = jj->first;
struct IntPair pair = jj->second;
(void) fprintf( stderr, " %15.7e %15.7e %d %d\n", angleX, angle, pair.index1, pair.index2 );
}
}
#endif
// load reordered entries
Map_Double_MapDoubleIntPairCI mapII = mapAngles.begin();
Map_Double_IntPair map_Double_IntPair = mapII->second;
Map_Double_IntPairCI mapJJ = map_Double_IntPair.begin();
for (unsigned int ii = 0; ii < grid.size(); ii++) {
for (unsigned int jj = 0; jj < grid[ii].size(); jj++) {
struct IntPair pair = mapJJ->second;
int index1 = pair.index1;
int index2 = pair.index2;
//(void) fprintf( stderr, " %3d %3d %15.7e %15.7e %3d %3d zzz\n", ii, jj, mapII->first, mapJJ->first, index1, index2 );
for (unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
reorderedGrid[ii][jj][kk] = static_cast<float>(grid[index1][index2][kk]);
}
// increment map iterators
mapJJ++;
if( mapJJ == map_Double_IntPair.end() ){
mapII++;
if( mapII == mapAngles.end() ){
if( (jj != (grid[ii].size()-1)) && (ii != (grid.size()-1)) ){
char buffer[1024];
(void) sprintf( buffer, "AmoebaTorsionTorsionForceImpl::reorderGrid: error detected with map iterators.\n" );
throw OpenMMException(buffer);
}
} else {
map_Double_IntPair = mapII->second;
mapJJ = map_Double_IntPair.begin();
}
}
}
}
return;
}
std::vector<std::string> AmoebaTorsionTorsionForceImpl::getKernelNames() {
std::vector<std::string> names;
names.push_back(CalcAmoebaTorsionTorsionForceKernel::Name());
......
......@@ -37,6 +37,7 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
hasAmoebaBonds = false;
hasAmoebaGeneralizedKirkwood = false;
hasAmoebaMultipole = false;
useGrycuk = true;
amoebaGpu = amoebaGpuInit( cudaPlatformData.gpu );
localForceKernel = NULL;
log = NULL;
......@@ -45,6 +46,10 @@ AmoebaCudaData::AmoebaCudaData( CudaPlatform::PlatformData& data ) : cudaPlatfor
applyMultipoleCutoff = 0;
useVdwNeighborList = 0;
multipoleForceCount = 0;
boxDimensions[0] = 0.0;
boxDimensions[1] = 0.0;
boxDimensions[2] = 0.0;
}
AmoebaCudaData::~AmoebaCudaData() {
......@@ -74,6 +79,10 @@ bool AmoebaCudaData::getHasAmoebaMultipole( void ) const {
return hasAmoebaMultipole;
}
int AmoebaCudaData::getUseGrycuk( void ) const {
return useGrycuk;
}
void AmoebaCudaData::setHasAmoebaGeneralizedKirkwood( bool inputHasAmoebaGeneralizedKirkwood ) {
hasAmoebaGeneralizedKirkwood = inputHasAmoebaGeneralizedKirkwood;
}
......@@ -108,6 +117,7 @@ void AmoebaCudaData::setContextImpl( void* inputContextImpl ) {
}
void AmoebaCudaData::initializeGpu( void ) {
if( !gpuInitialized ){
if( getHasAmoebaGeneralizedKirkwood() && !getHasAmoebaMultipole() ){
throw OpenMMException("GK force requires Multipole force\n");
......@@ -117,16 +127,33 @@ void AmoebaCudaData::initializeGpu( void ) {
amoebaGpuBuildThreadBlockWorkList( amoebaGpu );
amoebaGpuBuildVdwExclusionList( amoebaGpu );
amoebaGpuBuildScalingList( amoebaGpu );
amoebaGpuSetConstants( amoebaGpu );
amoebaGpuSetConstants( amoebaGpu, 0 );
gpuInitialized = true;
boxDimensions[0] = amoebaGpu->gpuContext->sim.periodicBoxSizeX;
boxDimensions[1] = amoebaGpu->gpuContext->sim.periodicBoxSizeY;
boxDimensions[2] = amoebaGpu->gpuContext->sim.periodicBoxSizeZ;
gpuInitialized = true;
if( log ){
gpuPrintCudaAmoebaGmxSimulation( amoebaGpu, getLog() );
(void) fprintf( log, "Gpu initialized\n" );
(void) fflush( log );
}
} else {
if( boxDimensions[0] != amoebaGpu->gpuContext->sim.periodicBoxSizeX ||
boxDimensions[1] != amoebaGpu->gpuContext->sim.periodicBoxSizeY ||
boxDimensions[2] != amoebaGpu->gpuContext->sim.periodicBoxSizeZ ){
amoebaGpuSetConstants( amoebaGpu, 1 );
boxDimensions[0] = amoebaGpu->gpuContext->sim.periodicBoxSizeX;
boxDimensions[1] = amoebaGpu->gpuContext->sim.periodicBoxSizeY;
boxDimensions[2] = amoebaGpu->gpuContext->sim.periodicBoxSizeZ;
}
}
return;
}
......
......@@ -76,6 +76,13 @@ public:
*/
bool getHasAmoebaMultipole( void ) const;
/**
* Get use Grycuk flag
*
* @return value of useGrycuk
*/
int getUseGrycuk( void ) const;
/**
* Set value of hasAmoebaGeneralizedKirkwood
*
......@@ -190,11 +197,13 @@ private:
int multipoleForceCount;
int applyMultipoleCutoff;
int useVdwNeighborList;
int useGrycuk;
KernelImpl* localForceKernel;
unsigned int kernelCount;
void* contextImpl;
FILE* log;
bool gpuInitialized;
double boxDimensions[3];
};
......
......@@ -31,6 +31,7 @@
#include "kernels/amoebaCudaKernels.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h"
#include "openmm/internal/AmoebaWcaDispersionForceImpl.h"
#include "openmm/internal/AmoebaTorsionTorsionForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "CudaForceInfo.h"
......@@ -738,22 +739,39 @@ void CudaCalcAmoebaTorsionTorsionForceKernel::initialize(const System& system, c
// torsion-torsion grids
numTorsionTorsionGrids = force.getNumTorsionTorsionGrids();
std::vector< std::vector< std::vector< std::vector<float> > > > floatGrids;
std::vector<TorsionTorsionGridFloat> floatGrids;
floatGrids.resize(numTorsionTorsionGrids);
for (int gridIndex = 0; gridIndex < numTorsionTorsionGrids; gridIndex++) {
const TorsionTorsionGrid& grid = force.getTorsionTorsionGrid( gridIndex );
floatGrids[gridIndex].resize( grid.size() );
// check if grid needs to be reordered: x-angle should be 'slow' index
TorsionTorsionGrid reorderedGrid;
int reorder = 0;
if( grid[0][0][0] != grid[0][1][0] ){
AmoebaTorsionTorsionForceImpl::reorderGrid( grid, reorderedGrid );
reorder = 1;
if( data.getLog() ){
(void) fprintf( data.getLog(), "CudaCalcAmoebaTorsionTorsionForceKernel::initialize: reordering torsion-torsion grid %d.\n", gridIndex );
}
}
for (unsigned int ii = 0; ii < grid.size(); ii++) {
floatGrids[gridIndex][ii].resize( grid[ii].size() );
for (unsigned int jj = 0; jj < grid[ii].size(); jj++) {
floatGrids[gridIndex][ii][jj].resize( grid[ii][jj].size() );
for (unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
floatGrids[gridIndex][ii][jj][kk] = static_cast<float>(grid[ii][jj][kk]);
if( reorder ){
for( unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
floatGrids[gridIndex][ii][jj][kk] = static_cast<float>(reorderedGrid[ii][jj][kk]);
}
} else {
for( unsigned int kk = 0; kk < grid[ii][jj].size(); kk++) {
floatGrids[gridIndex][ii][jj][kk] = static_cast<float>(grid[ii][jj][kk]);
}
}
}
}
......@@ -788,12 +806,17 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
data.initializeGpu();
// calculate Born radii
// calculate Born radii using either the Grycuk or OBC algorithm if GK is active
if( data.getHasAmoebaGeneralizedKirkwood() ){
kClearBornSum( gpu->gpuContext );
kCalculateObcGbsaBornSum(gpu->gpuContext);
kReduceObcGbsaBornSum(gpu->gpuContext);
if( data.getUseGrycuk() ){
kCalculateAmoebaGrycukBornRadii( gpu );
kReduceGrycukGbsaBornSum( gpu );
} else {
kCalculateObcGbsaBornSum(gpu->gpuContext);
kReduceObcGbsaBornSum(gpu->gpuContext);
}
}
// multipoles
......@@ -990,23 +1013,27 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
zsize = pmeGridDimension[2];
pmeParametersSetBasedOnEwaldErrorTolerance = 0;
}
gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize);
if( data.getLog() ){
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d] -",
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]\n",
force.getEwaldErrorTolerance(), force.getCutoffDistance(), alpha, xsize, ysize, zsize );
if( pmeParametersSetBasedOnEwaldErrorTolerance ){
(void) fprintf( data.getLog(), " parameters set based on error tolerance and OpenMM algorithm.\n" );
(void) fprintf( data.getLog(), "Parameters based on error tolerance and OpenMM algorithm.\n" );
} else {
double alphaT;
int xsizeT, ysizeT, zsizeT;
NonbondedForceImpl::calcPMEParameters(system, nb, alphaT, xsizeT, ysizeT, zsizeT);
double impliedTolerance = alpha*force.getCutoffDistance();
impliedTolerance = 0.5*exp( -(impliedTolerance*impliedTolerance) );
(void) fprintf( data.getLog(), " using input parameters implied tolerance=%12.3e;", impliedTolerance );
(void) fprintf( data.getLog(), "Using input parameters implied tolerance=%12.3e;", impliedTolerance );
(void) fprintf( data.getLog(), "OpenMM param: aEwald=%12.3f [%6d %6d %6d]\n", alphaT, xsizeT, ysizeT, zsizeT);
}
(void) fprintf( data.getLog(), "\n" );
(void) fflush( data.getLog() );
}
gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize);
data.setApplyMultipoleCutoff( 1 );
data.cudaPlatformData.nonbondedMethod = PARTICLE_MESH_EWALD;
......@@ -1067,12 +1094,25 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
scale[ii] = static_cast<float>( scalingFactor );
charge[ii] = static_cast<float>( particleCharge );
}
gpuSetAmoebaObcParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
static_cast<float>( force.getSolventDielectric() ),
static_cast<float>( force.getDielectricOffset() ), radius, scale, charge,
force.getIncludeCavityTerm(),
static_cast<float>( force.getProbeRadius() ),
static_cast<float>( force.getSurfaceAreaFactor() ) );
if( data.getUseGrycuk() ){
gpuSetAmoebaGrycukParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
static_cast<float>( force.getSolventDielectric() ),
static_cast<float>( force.getDielectricOffset() ), radius, scale, charge,
force.getIncludeCavityTerm(),
static_cast<float>( force.getProbeRadius() ),
static_cast<float>( force.getSurfaceAreaFactor() ) );
} else {
gpuSetAmoebaObcParameters( data.getAmoebaGpu(), static_cast<float>(force.getSoluteDielectric() ),
static_cast<float>( force.getSolventDielectric() ),
static_cast<float>( force.getDielectricOffset() ), radius, scale, charge,
force.getIncludeCavityTerm(),
static_cast<float>( force.getProbeRadius() ),
static_cast<float>( force.getSurfaceAreaFactor() ) );
}
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
}
......
......@@ -32,12 +32,16 @@
#ifdef WIN32
#define _USE_MATH_DEFINES /* M_PI */
#endif
#define PARAMETER_PRINT 1
#define MAX_PARAMETER_PRINT 10
#include "openmm/OpenMMException.h"
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
// for some reason, these are not being included w/ cudaKernels.h on Windows
extern void OPENMMCUDA_EXPORT SetCalculateObcGbsaForces2Sim(gpuContext gpu);
//extern void OPENMMCUDA_EXPORT SetCalculateObcGbsaForces2Sim(gpuContext gpu);
extern void OPENMMCUDA_EXPORT SetForcesSim(gpuContext gpu);
#include <cmath>
......@@ -503,20 +507,33 @@ void gpuSetAmoebaBondParameters(amoebaGpuContext amoebaGpu, const std::vector<in
(*psBondID)[i].w = gpu->pOutputBufferCounter[(*psBondID)[i].y]++;
(*psBondParameter)[i].x = length[i];
(*psBondParameter)[i].y = k[i];
}
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( amoebaGpu->log && (i < DUMP_PARAMETERS || i > bonds - (DUMP_PARAMETERS + 1) ) )
fprintf( amoebaGpu->log, "Bonds: %5d [%5d %5d %5d %5d] L=%15.7e k[%15.7e %15.7e %15.7e] [%5d %5d]\n",
i, (*psBondID)[i].x, (*psBondID)[i].y, (*psBondID)[i].z, (*psBondID)[i].w,
(*psBondParameter)[i].x, (*psBondParameter)[i].y, cubic, quartic,
gpu->pOutputBufferCounter[(*psBondID)[i].x],
gpu->pOutputBufferCounter[(*psBondID)[i].y] );
#endif
#undef DUMP_PARAMETERS
// logging info
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = bonds;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaBondParameters: number of bonds=%5d cubicK=%15.7e quarticK=%15.7e\n", bonds, cubic, quartic );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] l=%15.7e k=%15.7e counters: [%5d %5d]\n",
ii, (*psBondID)[ii].x, (*psBondID)[ii].y, (*psBondID)[ii].z, (*psBondID)[ii].w,
(*psBondParameter)[ii].x, (*psBondParameter)[ii].y,
gpu->pOutputBufferCounter[(*psBondID)[ii].x],
gpu->pOutputBufferCounter[(*psBondID)[ii].y] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
psBondID->Upload();
psBondParameter->Upload();
}
......@@ -548,19 +565,34 @@ void gpuSetAmoebaUreyBradleyParameters(amoebaGpuContext amoebaGpu, const std::ve
(*psUreyBradleyParameter)[i].x = length[i];
(*psUreyBradleyParameter)[i].y = k[i];
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( amoebaGpu->log && (i < DUMP_PARAMETERS || i > bonds - (DUMP_PARAMETERS + 1) ) )
fprintf( amoebaGpu->log, "UreyBradleys: %5d [%5d %5d %5d %5d] L=%15.7e k[%15.7e %15.7e %15.7e] [%5d %5d]\n",
i, (*psUreyBradleyID)[i].x, (*psUreyBradleyID)[i].y, (*psUreyBradleyID)[i].z, (*psUreyBradleyID)[i].w,
(*psUreyBradleyParameter)[i].x, (*psUreyBradleyParameter)[i].y, cubic, quartic,
gpu->pOutputBufferCounter[(*psUreyBradleyID)[i].x],
gpu->pOutputBufferCounter[(*psUreyBradleyID)[i].y] );
#endif
#undef DUMP_PARAMETERS
}
// logging info
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = bonds;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaUreyBradleyParameters: number of bonds=%5d cubicK==%15.7e quarticK=%15.7e\n", bonds, cubic, quartic );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] L=%15.7e k=%15.7e counters: [%5d %5d]\n",
ii, (*psUreyBradleyID)[ii].x, (*psUreyBradleyID)[ii].y, (*psUreyBradleyID)[ii].z, (*psUreyBradleyID)[ii].w,
(*psUreyBradleyParameter)[ii].x, (*psUreyBradleyParameter)[ii].y,
gpu->pOutputBufferCounter[(*psUreyBradleyID)[ii].x],
gpu->pOutputBufferCounter[(*psUreyBradleyID)[ii].y] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
psUreyBradleyID->Upload();
psUreyBradleyParameter->Upload();
}
......@@ -602,21 +634,37 @@ void gpuSetAmoebaAngleParameters(amoebaGpuContext amoebaGpu, const std::vector<i
psAngleID1->_pSysData[i].w = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].x]++;
psAngleID2->_pSysData[i].x = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].y]++;
psAngleID2->_pSysData[i].y = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].z]++;
}
// logging info
if( amoebaGpu->log ){
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > bond_angles - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "Angles: %5d [%5d %5d %5d %5d] [%5d %5d] A=%15.7e k=%15.7e [%5d %5d %5d]\n", i,
(*psAngleID1)[i].x, (*psAngleID1)[i].y, (*psAngleID1)[i].z, (*psAngleID1)[i].w,
(*psAngleID2)[i].x, (*psAngleID2)[i].y,
(*psAngleParameter)[i].x, (*psAngleParameter)[i].y,
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].z] );
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = bond_angles;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaAngleParameters: number of angles=%5d cubicK=%15.7e quarticK=%15.7e penticK=%15.7e sexticK=%15.7e\n",
bond_angles, cubicK, quarticK, penticK, sexticK );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] [%5d %5d] A=%15.7e k=%15.7e [%5d %5d %5d]\n", ii,
(*psAngleID1)[ii].x, (*psAngleID1)[ii].y, (*psAngleID1)[ii].z, (*psAngleID1)[ii].w,
(*psAngleID2)[ii].x, (*psAngleID2)[ii].y,
(*psAngleParameter)[ii].x, (*psAngleParameter)[ii].y,
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].z] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psAngleID1->Upload();
psAngleID2->Upload();
psAngleParameter->Upload();
......@@ -662,21 +710,36 @@ void gpuSetAmoebaInPlaneAngleParameters(amoebaGpuContext amoebaGpu, const std::v
psAngleID2->_pSysData[i].y = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].y]++;
psAngleID2->_pSysData[i].z = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].z]++;
psAngleID2->_pSysData[i].w = gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].w]++;
}
// logging info
if( amoebaGpu->log ){
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 10
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > bond_angles - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "InPlaneAngles: %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] A=%15.7e k=%15.7e [%5d %5d %5d %5d]\n", i,
(*psAngleID1)[i].x, (*psAngleID1)[i].y, (*psAngleID1)[i].z, (*psAngleID1)[i].w,
(*psAngleID2)[i].x, (*psAngleID2)[i].y, (*psAngleID2)[i].z, (*psAngleID2)[i].w,
(*psAngleParameter)[i].x, (*psAngleParameter)[i].y,
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].z],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[i].w] );
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = bond_angles;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaInPlaneAngleParameters: number of angles=%5d cubicK=%15.7e quarticK=%15.7e penticK==%15.7e sexticK=%15.7e\n",
bond_angles, cubicK, quarticK, penticK, sexticK );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] A=%15.7e k=%15.7e [%5d %5d %5d %5d]\n", ii,
(*psAngleID1)[ii].x, (*psAngleID1)[ii].y, (*psAngleID1)[ii].z, (*psAngleID1)[ii].w,
(*psAngleID2)[ii].x, (*psAngleID2)[ii].y, (*psAngleID2)[ii].z, (*psAngleID2)[ii].w,
(*psAngleParameter)[ii].x, (*psAngleParameter)[ii].y,
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].z],
gpu->pOutputBufferCounter[psAngleID1->_pSysData[ii].w] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psAngleID1->Upload();
......@@ -731,24 +794,37 @@ void gpuSetAmoebaTorsionParameters(amoebaGpuContext amoebaGpu, const std::vector
psTorsionID2->_pSysData[i].y = gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].y]++;
psTorsionID2->_pSysData[i].z = gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].z]++;
psTorsionID2->_pSysData[i].w = gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].w]++;
}
// logging info
if( amoebaGpu->log ){
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > torsions - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "Torsions: %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] 0[%15.7e %15.7e] 1[%15.7e %15.7e] 2[%15.7e %15.7e] [%5d %5d %5d %5d]\n", i,
(*psTorsionID1)[i].x, (*psTorsionID1)[i].y, (*psTorsionID1)[i].z, (*psTorsionID1)[i].w,
(*psTorsionID2)[i].x, (*psTorsionID2)[i].y, (*psTorsionID2)[i].z, (*psTorsionID2)[i].w,
(*psTorsionParameter1)[i].x, (*psTorsionParameter1)[i].y, (*psTorsionParameter1)[i].z, (*psTorsionParameter1)[i].w,
(*psTorsionParameter2)[i].x, (*psTorsionParameter2)[i].y,
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].z],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[i].w] );
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = torsions;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaTorsionParameters: number of torsions=%5d\n", torsions );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] 0[%15.7e %15.7e] 1[%15.7e %15.7e] 2[%15.7e %15.7e] [%5d %5d %5d %5d]\n", ii,
(*psTorsionID1)[ii].x, (*psTorsionID1)[ii].y, (*psTorsionID1)[ii].z, (*psTorsionID1)[ii].w,
(*psTorsionID2)[ii].x, (*psTorsionID2)[ii].y, (*psTorsionID2)[ii].z, (*psTorsionID2)[ii].w,
(*psTorsionParameter1)[ii].x, (*psTorsionParameter1)[ii].y, (*psTorsionParameter1)[ii].z, (*psTorsionParameter1)[ii].w,
(*psTorsionParameter2)[ii].x, (*psTorsionParameter2)[ii].y,
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[ii].z],
gpu->pOutputBufferCounter[psTorsionID1->_pSysData[ii].w] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psTorsionID1->Upload();
psTorsionID2->Upload();
psTorsionParameter1->Upload();
......@@ -799,24 +875,38 @@ void gpuSetAmoebaPiTorsionParameters(amoebaGpuContext amoebaGpu, const std::vect
psPiTorsionID3->_pSysData[i].y = gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[i].w]++;
psPiTorsionID3->_pSysData[i].z = gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[i].x]++;
psPiTorsionID3->_pSysData[i].w = gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[i].y]++;
}
// logging info
if( amoebaGpu->log ){
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > piTorsions - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "PiTorsions: %5d [%5d %5d %5d %5d %5d %5d [%5d %5d %5d %5d %5d %5d] k=%15.7e [%5d %5d %5d %5d %5d %5d]\n", i,
(*psPiTorsionID1)[i].x, (*psPiTorsionID1)[i].y, (*psPiTorsionID1)[i].z, (*psPiTorsionID1)[i].w,
(*psPiTorsionID2)[i].x, (*psPiTorsionID2)[i].y, (*psPiTorsionID2)[i].z, (*psPiTorsionID2)[i].w,
(*psPiTorsionID3)[i].x, (*psPiTorsionID3)[i].y, (*psPiTorsionID3)[i].z, (*psPiTorsionID3)[i].w,
(*psPiTorsionParameter)[i],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[i].z],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[i].w],
gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[i].x],
gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[i].y] );
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = piTorsions;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaPiTorsionParameters: number of pi torsions=%5d\n", piTorsions );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
fprintf( amoebaGpu->log, "PiTorsions: %5d [%5d %5d %5d %5d %5d %5d [%5d %5d %5d %5d %5d %5d] k=%15.7e [%5d %5d %5d %5d %5d %5d]\n", ii,
(*psPiTorsionID1)[ii].x, (*psPiTorsionID1)[ii].y, (*psPiTorsionID1)[ii].z, (*psPiTorsionID1)[ii].w,
(*psPiTorsionID2)[ii].x, (*psPiTorsionID2)[ii].y, (*psPiTorsionID2)[ii].z, (*psPiTorsionID2)[ii].w,
(*psPiTorsionID3)[ii].x, (*psPiTorsionID3)[ii].y, (*psPiTorsionID3)[ii].z, (*psPiTorsionID3)[ii].w,
(*psPiTorsionParameter)[ii],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[ii].z],
gpu->pOutputBufferCounter[psPiTorsionID1->_pSysData[ii].w],
gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[ii].x],
gpu->pOutputBufferCounter[psPiTorsionID2->_pSysData[ii].y] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psPiTorsionID1->Upload();
......@@ -859,21 +949,36 @@ void gpuSetAmoebaStretchBendParameters(amoebaGpuContext amoebaGpu, const std::ve
psStretchBendID1->_pSysData[i].w = gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].x]++;
psStretchBendID2->_pSysData[i].x = gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].y]++;
psStretchBendID2->_pSysData[i].y = gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].z]++;
}
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > stretchBends - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "StretchBends: %5d [%5d %5d %5d] [%5d %5d %5d] [%15.7e %15.7e %15.7e %15.7e [%5d %5d %5d]\n", i,
(*psStretchBendID1)[i].x, (*psStretchBendID1)[i].y, (*psStretchBendID1)[i].z, (*psStretchBendID1)[i].w,
(*psStretchBendID2)[i].x, (*psStretchBendID2)[i].y,
(*psStretchBendParameter)[i].x, (*psStretchBendParameter)[i].y, (*psStretchBendParameter)[i].z, (*psStretchBendParameter)[i].w,
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[i].z] );
// logging info
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = stretchBends;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaStretchBendParameters: number of stretch bends=%5d\n", stretchBends);
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d] [%5d %5d %5d] [%15.7e %15.7e %15.7e %15.7e [%5d %5d %5d]\n", ii,
(*psStretchBendID1)[ii].x, (*psStretchBendID1)[ii].y, (*psStretchBendID1)[ii].z, (*psStretchBendID1)[ii].w,
(*psStretchBendID2)[ii].x, (*psStretchBendID2)[ii].y,
(*psStretchBendParameter)[ii].x, (*psStretchBendParameter)[ii].y, (*psStretchBendParameter)[ii].z, (*psStretchBendParameter)[ii].w,
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psStretchBendID1->_pSysData[ii].z] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psStretchBendID1->Upload();
psStretchBendID2->Upload();
psStretchBendParameter->Upload();
......@@ -906,13 +1011,6 @@ void gpuSetAmoebaOutOfPlaneBendParameters(amoebaGpuContext amoebaGpu, const std:
amoebaGpu->amoebaSim.amoebaOutOfPlaneBendPenticK = penticK;
amoebaGpu->amoebaSim.amoebaOutOfPlaneBendSexticK = sexticK;
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 10
#if (DUMP_PARAMETERS > 0 )
if( amoebaGpu->log )
fprintf( amoebaGpu->log, "OutOfPlaneBends: global ks[%15.7e %15.7e %15.7e %15.7e]\n", cubicK, quarticK, penticK, sexticK );
#endif
for (int i = 0; i < outOfPlaneBends; i++)
{
(*psOutOfPlaneBendID1)[i].x = particles1[i];
......@@ -924,20 +1022,38 @@ void gpuSetAmoebaOutOfPlaneBendParameters(amoebaGpuContext amoebaGpu, const std:
psOutOfPlaneBendID2->_pSysData[i].y = gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].y]++;
psOutOfPlaneBendID2->_pSysData[i].z = gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].z]++;
psOutOfPlaneBendID2->_pSysData[i].w = gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].w]++;
}
// logging info
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > outOfPlaneBends - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "OutOfPlaneBends: %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] k=%15.7e [%5d %5d %5d %5d]\n", i,
(*psOutOfPlaneBendID1)[i].x, (*psOutOfPlaneBendID1)[i].y, (*psOutOfPlaneBendID1)[i].z, (*psOutOfPlaneBendID1)[i].w,
(*psOutOfPlaneBendID2)[i].x, (*psOutOfPlaneBendID2)[i].y, (*psOutOfPlaneBendID2)[i].z, (*psOutOfPlaneBendID2)[i].w,
(*psOutOfPlaneBendParameter)[i],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].z],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[i].w] );
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = outOfPlaneBends;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaOutOfPlaneBendParameters: number of out-of-plane bends=%5d global ks[%15.7e %15.7e %15.7e %15.7e]\n", outOfPlaneBends,
cubicK, quarticK, penticK, sexticK );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, " %5d [%5d %5d %5d %5d] [%5d %5d %5d %5d] k=%15.7e [%5d %5d %5d %5d]\n", ii,
(*psOutOfPlaneBendID1)[ii].x, (*psOutOfPlaneBendID1)[ii].y, (*psOutOfPlaneBendID1)[ii].z, (*psOutOfPlaneBendID1)[ii].w,
(*psOutOfPlaneBendID2)[ii].x, (*psOutOfPlaneBendID2)[ii].y, (*psOutOfPlaneBendID2)[ii].z, (*psOutOfPlaneBendID2)[ii].w,
(*psOutOfPlaneBendParameter)[ii],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[ii].z],
gpu->pOutputBufferCounter[psOutOfPlaneBendID1->_pSysData[ii].w] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psOutOfPlaneBendID1->Upload();
psOutOfPlaneBendID2->Upload();
psOutOfPlaneBendParameter->Upload();
......@@ -980,23 +1096,38 @@ void gpuSetAmoebaTorsionTorsionParameters(amoebaGpuContext amoebaGpu, const std:
psTorsionTorsionID3->_pSysData[i].y = gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].z]++;
psTorsionTorsionID3->_pSysData[i].z = gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].w]++;
psTorsionTorsionID3->_pSysData[i].w = gpu->pOutputBufferCounter[psTorsionTorsionID2->_pSysData[i].x]++;
}
// logging info
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (i < DUMP_PARAMETERS || i > torsionTorsions - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
fprintf( amoebaGpu->log, "TorsionTorsions: %5d [%5d %5d %5d %5d %5d] chiral=%5d grid=%5d [%5d %5d %5d %5d %5d] [%5d %5d %5d %5d %5d]\n", i,
(*psTorsionTorsionID1)[i].x, (*psTorsionTorsionID1)[i].y, (*psTorsionTorsionID1)[i].z, (*psTorsionTorsionID1)[i].w,
(*psTorsionTorsionID2)[i].x, (*psTorsionTorsionID2)[i].y, (*psTorsionTorsionID2)[i].z, (*psTorsionTorsionID2)[i].w,
(*psTorsionTorsionID3)[i].x, (*psTorsionTorsionID3)[i].y, (*psTorsionTorsionID3)[i].z, (*psTorsionTorsionID3)[i].w,
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].x],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].y],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].z],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[i].w],
gpu->pOutputBufferCounter[psTorsionTorsionID2->_pSysData[i].x] );
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = torsionTorsions;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaTorsionTorsionParameters: number of torsion-torsions=%5d\n", torsionTorsions );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log, "TorsionTorsions: %5d [%5d %5d %5d %5d %5d] chiral=%5d grid=%5d [%5d %5d %5d %5d %5d] [%5d %5d %5d %5d %5d]\n", ii,
(*psTorsionTorsionID1)[ii].x, (*psTorsionTorsionID1)[ii].y, (*psTorsionTorsionID1)[ii].z, (*psTorsionTorsionID1)[ii].w,
(*psTorsionTorsionID2)[ii].x, (*psTorsionTorsionID2)[ii].y, (*psTorsionTorsionID2)[ii].z, (*psTorsionTorsionID2)[ii].w,
(*psTorsionTorsionID3)[ii].x, (*psTorsionTorsionID3)[ii].y, (*psTorsionTorsionID3)[ii].z, (*psTorsionTorsionID3)[ii].w,
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[ii].x],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[ii].y],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[ii].z],
gpu->pOutputBufferCounter[psTorsionTorsionID1->_pSysData[ii].w],
gpu->pOutputBufferCounter[psTorsionTorsionID2->_pSysData[ii].x] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
#undef DUMP_PARAMETERS
(void) fflush( amoebaGpu->log );
}
psTorsionTorsionID1->Upload();
psTorsionTorsionID2->Upload();
psTorsionTorsionID3->Upload();
......@@ -1069,7 +1200,7 @@ void gpuSetAmoebaTorsionTorsionGrids(amoebaGpuContext amoebaGpu, const std::vect
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Grids %u totalGridEntries=%u totalFloat4 entries=%u\n", torsionTorsionGrids, totalGridEntries, totalEntries );
for (unsigned int ii = 0; ii < floatGrids.size(); ii++) {
(void) fprintf( amoebaGpu->log, "Grid %u offset=%d begin=%.3f delta=%.3f Ny=%d\n", ii, amoebaGpu->amoebaSim.amoebaTorTorGridOffset[ii],
(void) fprintf( amoebaGpu->log, "Grid %u offset=%6d begin=%10.3f delta=%10.3f Ny=%3d\n", ii, amoebaGpu->amoebaSim.amoebaTorTorGridOffset[ii],
amoebaGpu->amoebaSim.amoebaTorTorGridBegin[ii], amoebaGpu->amoebaSim.amoebaTorTorGridDelta[ii], amoebaGpu->amoebaSim.amoebaTorTorGridNy[ii]);
}
}
......@@ -1080,25 +1211,43 @@ void gpuSetAmoebaTorsionTorsionGrids(amoebaGpuContext amoebaGpu, const std::vect
for (unsigned int ii = 0; ii < floatGrids.size(); ii++) {
for (unsigned int jj = 0; jj < floatGrids[ii].size(); jj++) {
for (unsigned int kk = 0; kk < floatGrids[ii][jj].size(); kk++) {
(*psTorsionTorsionGrids)[index].x = floatGrids[ii][jj][kk][2];
(*psTorsionTorsionGrids)[index].y = floatGrids[ii][jj][kk][3];
(*psTorsionTorsionGrids)[index].z = floatGrids[ii][jj][kk][4];
(*psTorsionTorsionGrids)[index].w = floatGrids[ii][jj][kk][5];
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 5
#if (DUMP_PARAMETERS > 0 )
if( (index < DUMP_PARAMETERS || index > totalEntries - (DUMP_PARAMETERS + 1)) && amoebaGpu->log )
(void) fprintf( amoebaGpu->log, "TorsionTorsionGrid: %5d %5d [%5d %5d ] [%10.3f %10.3f] [%15.7e %15.7e %15.7e %15.7e]\n", index, ii, jj, kk,
floatGrids[ii][jj][kk][0], floatGrids[ii][jj][kk][1],
(*psTorsionTorsionGrids)[index].x, (*psTorsionTorsionGrids)[index].y, (*psTorsionTorsionGrids)[index].z, (*psTorsionTorsionGrids)[index].w );
#endif
#undef DUMP_PARAMETERS
index++;
}
}
}
// logging info
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaTorsionTorsionGrids: number of grids=%5u\n", static_cast<unsigned int>(floatGrids.size()) );
#ifdef PARAMETER_PRINT
unsigned int index = 0;
for (unsigned int ii = 0; ii < floatGrids.size(); ii++) {
for (unsigned int jj = 0; jj < floatGrids[ii].size(); jj++) {
for (unsigned int kk = 0; kk < floatGrids[ii][jj].size(); kk++) {
if( index < maxPrint || (index > (totalEntries - maxPrint)) ){
(void) fprintf( amoebaGpu->log, " %5d %5d [%5d %5d ] [%10.3f %10.3f] [%15.7e %15.7e %15.7e %15.7e]\n", index, ii, jj, kk,
floatGrids[ii][jj][kk][0], floatGrids[ii][jj][kk][1],
(*psTorsionTorsionGrids)[index].x, (*psTorsionTorsionGrids)[index].y, (*psTorsionTorsionGrids)[index].z, (*psTorsionTorsionGrids)[index].w );
}
index++;
}
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
#if 0
std::vector<float> grids;
grids.resize( totalGridEntries );
......@@ -1155,8 +1304,6 @@ if( (index < DUMP_PARAMETERS || index > totalEntries - (DUMP_PARAMETERS + 1)) &&
exit(0);
#endif
psTorsionTorsionGrids->Upload();
}
......@@ -1201,8 +1348,6 @@ void gpuSetAmoebaBondOffsets(amoebaGpuContext amoebaGpu )
amoebaGpu->amoebaSim.amoebaUreyBradley_offset = amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset +
(amoebaGpu->psAmoebaUreyBradleyParameter ? amoebaGpu->psAmoebaUreyBradleyParameter->_stride : 0);
//gpu->sim.localForces_threads_per_block = (std::max(amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset, gpu->sim.customBonds) / gpu->sim.blocks + 15) & 0xfffffff0;
unsigned int maxI = (amoebaGpu->amoebaSim.amoebaUreyBradley_offset > gpu->sim.customBonds) ? amoebaGpu->amoebaSim.amoebaUreyBradley_offset : gpu->sim.customBonds;
gpu->sim.localForces_threads_per_block = (maxI/gpu->sim.blocks + 15) & 0xfffffff0;
if (gpu->sim.localForces_threads_per_block > gpu->sim.max_localForces_threads_per_block)
......@@ -1403,8 +1548,6 @@ static void gpuFixedEFieldAllocate( amoebaGpuContext amoebaGpu )
unsigned int offset = paddedNumberOfAtoms*sizeof( float );
memset( amoebaGpu->psDampingFactorAndThole->_pSysData, 0,2*offset );
//memset( amoebaGpu->psE_Field->_pSysData, 0, offset*3 );
//memset( amoebaGpu->psE_FieldPolar->_pSysData, 0, offset*3 );
}
......@@ -1522,13 +1665,6 @@ void gpuKirkwoodAllocate( amoebaGpuContext amoebaGpu )
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s: paddedNumberOfAtoms =%d\n", methodName.c_str(), paddedNumberOfAtoms );
(void) fflush( amoebaGpu->log );
}
#endif
amoebaGpu->psBorn = new CUDAStream<float>(paddedNumberOfAtoms, 1, "KirkwoodBorn");
amoebaGpu->psBornPolar = new CUDAStream<float>(paddedNumberOfAtoms, 1, "KirkwoodBornPolar");
amoebaGpu->psGk_Field = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "Gk_Fixed_Field");
......@@ -1596,7 +1732,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
unsigned int dipoleIndex = 0;
unsigned int quadrupoleIndex = 0;
unsigned int maxPrint = 5;
unsigned int maxPrint = 10;
if( nonbondedMethod == 0 ){
amoebaGpu->multipoleNonbondedMethod = AMOEBA_NO_CUTOFF;
......@@ -1606,12 +1742,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
throw OpenMM::OpenMMException("multipoleNonbondedMethod not recognized.\n" );
}
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d] polarizationType=%d (0=mutual/1=direct)\n",
methodName.c_str(), nonbondedMethod, amoebaGpu->multipoleNonbondedMethod,
AMOEBA_NO_CUTOFF, AMOEBA_PARTICLE_MESH_EWALD, polarizationType );
(void) fflush( amoebaGpu->log );
}
amoebaGpu->amoebaSim.sqrtPi = std::sqrt( 3.14159265358f );
amoebaGpu->amoebaSim.electric = electricConstant;
amoebaGpu->gpuContext->sim.alphaEwald = alphaEwald;
......@@ -1621,11 +1751,28 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->amoebaSim.dielec = 1.0f;
}
static const int maxAxisType = 6;
int axisTypeCount[maxAxisType+1] = { 0, 0, 0, 0, 0, 0, 0 };
// logging info
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s\n", methodName.c_str() );
(void) fprintf( amoebaGpu->log," input nonbonded method=%d multipoleNonbondedMethod=%d [NoCutoff=%d PME=%d]\n",
nonbondedMethod, amoebaGpu->multipoleNonbondedMethod, AMOEBA_NO_CUTOFF, AMOEBA_PARTICLE_MESH_EWALD );
(void) fprintf( amoebaGpu->log," polarizationType=%d (0=mutual/1=direct)\n", polarizationType );
(void) fprintf( amoebaGpu->log," maxCovalentDegreeSz=%d minId=%s mutualInducedMaxIterations=%d mutualInducedTargetEpsilon=%15.7e\n",
amoebaGpu->maxCovalentDegreeSz, minId.c_str(), amoebaGpu->mutualInducedMaxIterations, amoebaGpu->mutualInducedTargetEpsilon );
(void) fprintf( amoebaGpu->log," electric=%15.7e alphaEwald=%15.7e nonbondedCutoff=%15.7e dielec=%15.7e\n",
amoebaGpu->amoebaSim.electric, amoebaGpu->gpuContext->sim.alphaEwald, amoebaGpu->gpuContext->sim.nonbondedCutoff, amoebaGpu->amoebaSim.dielec );
(void) fflush( amoebaGpu->log );
}
std::vector<int> axisCount(charges.size(),0);
int maxTorqueBufferIndex = 0;
for( int ii = 0; ii < static_cast<int>(charges.size()); ii++ ){
static const int maxAxisType = 6;
int axisTypeCount[maxAxisType+1] = { 0, 0, 0, 0, 0, 0, 0 };
int maxTorqueBufferIndex = 0;
int chargeSize = static_cast<int>(charges.size());
for( int ii = 0; ii < chargeSize; ii++ ){
// axis type & multipole particles ids
......@@ -1666,10 +1813,15 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
}
if( 0 && amoebaGpu->log ){
fprintf( amoebaGpu->log, "Z1 %4d %d [%4d %4d %4d] dmp/thole %15.7e %15.7e\n", ii, axisType[ii],
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii], dampingFactors[ii], tholes[ii] );
#ifdef MAX_PARAMETER_PRINT
if( amoebaGpu->log && (ii < maxPrint || (ii > (chargeSize-maxPrint)) ) ){
(void) fprintf( amoebaGpu->log, "%6d axisType=%1d axis=[%6d %6d %6d] dipole[%15.7e %15.7e %15.7e] dmp/thole/polar %15.7e %15.7e %15.7e\n",
ii, axisType[ii],
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii],
dipoles[dipoleIndex], dipoles[dipoleIndex+1], dipoles[dipoleIndex+2],
dampingFactors[ii], tholes[ii], polarity[ii] );
}
#endif
// charges
......@@ -1748,8 +1900,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
}
#ifdef AMOEBA_DEBUG
if( (amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (charges.size() - maxPrint) )) ) ) ){
// logging info
if( 0 && (amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (chargeSize - maxPrint) )) ) ) ){
// axis particles
......@@ -1848,9 +2001,8 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->psDampingFactorAndThole->_pSysData[ii].x == -std::numeric_limits<double>::infinity()){
(void) fprintf( amoebaGpu->log,"Nan detected at index=%d in psDampingFactor\n", ii );
}
#endif
#ifdef AMOEBA_DEBUG
#if 0
if( amoebaGpu->log ){
// covalent
......@@ -1901,12 +2053,14 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
amoebaGpu->amoebaSim.maxTorqueBufferIndex = maxTorqueBufferIndex;
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Max axis count=%d\n", maxTorqueBufferIndex );
(void) fprintf( amoebaGpu->log, "%s max axis count=%d\n", methodName.c_str(), maxTorqueBufferIndex );
std::string axisLabel[maxAxisType+1] = { "ZThenX", "Bisector", "ZBisect", "ThreeFold", "ZOnly", "NoAxisType", "Unknown"};
for( unsigned int kk = 0; kk < (maxAxisType+1); kk++ ){
(void) fprintf( amoebaGpu->log, "%2u %10s atom count=%d\n", kk, axisLabel[kk].c_str(), axisTypeCount[kk] );
}
(void) fprintf( amoebaGpu->log, "\n" );
}
// upload
......@@ -1932,7 +2086,6 @@ void gpuSetAmoebaObcParameters( amoebaGpuContext amoebaGpu, float innerDielectri
gpuContext gpu = amoebaGpu->gpuContext;
int paddedNumberOfAtoms = gpu->sim.paddedNumberOfAtoms;
gpu->sim.dielectricOffset = dielectricOffset;
//gpu->bIncludeGBSA = 1;
amoebaGpu->includeObcCavityTerm = includeCavityTerm;
gpu->sim.probeRadius = probeRadius;
gpu->sim.surfaceAreaFactor = surfaceAreaFactor;
......@@ -1961,17 +2114,13 @@ void gpuSetAmoebaObcParameters( amoebaGpuContext amoebaGpu, float innerDielectri
amoebaGpu->amoebaSim.dwater = solventDielectric;
amoebaGpu->amoebaSim.dielec = innerDielectric;
// amoebaGpu->amoebaSim.fc = amoebaGpu->amoebaSim.electric*1.0f*(1.0f-solventDielectric)/(0.0f+1.0f*solventDielectric);;
// amoebaGpu->amoebaSim.fd = amoebaGpu->amoebaSim.electric*2.0f*(1.0f-solventDielectric)/(1.0f+2.0f*solventDielectric);;
// amoebaGpu->amoebaSim.fq = amoebaGpu->amoebaSim.electric*3.0f*(1.0f-solventDielectric)/(2.0f+3.0f*solventDielectric);;
amoebaGpu->amoebaSim.fc = 1.0f*(1.0f-solventDielectric)/(0.0f+1.0f*solventDielectric);;
amoebaGpu->amoebaSim.fd = 2.0f*(1.0f-solventDielectric)/(1.0f+2.0f*solventDielectric);;
amoebaGpu->amoebaSim.fq = 3.0f*(1.0f-solventDielectric)/(2.0f+3.0f*solventDielectric);;
gpu->sim.preFactor = -amoebaGpu->amoebaSim.electric*((1.0f/innerDielectric)-(1.0f/solventDielectric));
if( 0 && amoebaGpu->log ){
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"gpuSetAmoebaObcParameters: cavity=%d dielectricOffset=%15.7e probeRadius=%15.7e surfaceAreaFactor=%15.7e\n",
includeCavityTerm, dielectricOffset, probeRadius, surfaceAreaFactor );
(void) fprintf( amoebaGpu->log," gkc=%12.3f solventDielectric=%15.7e innerDielectric=%15.7e sim.preFactor=%15.7e\n",
......@@ -1993,6 +2142,84 @@ void gpuSetAmoebaObcParameters( amoebaGpuContext amoebaGpu, float innerDielectri
}
extern "C"
void gpuSetAmoebaGrycukParameters( amoebaGpuContext amoebaGpu, float innerDielectric, float solventDielectric, float dielectricOffset,
const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge,
int includeCavityTerm, float probeRadius, float surfaceAreaFactor )
{
gpuContext gpu = amoebaGpu->gpuContext;
int paddedNumberOfAtoms = gpu->sim.paddedNumberOfAtoms;
gpu->sim.dielectricOffset = dielectricOffset;
amoebaGpu->includeObcCavityTerm = includeCavityTerm;
gpu->sim.probeRadius = probeRadius;
gpu->sim.surfaceAreaFactor = surfaceAreaFactor;
unsigned int particles = radius.size();
for (unsigned int i = 0; i < particles; i++)
{
(*gpu->psObcData)[i].x = radius[i];
(*gpu->psObcData)[i].y = scale[i]*(*gpu->psObcData)[i].x;
(*gpu->psPosq4)[i].w = charge[i];
}
// Dummy out extra particles data
for (unsigned int i = particles; i < paddedNumberOfAtoms; i++)
{
(*gpu->psBornRadii)[i] = 0.2f;
(*gpu->psObcData)[i].x = 0.01f;
(*gpu->psObcData)[i].y = 0.01f;
}
gpu->psBornRadii->Upload();
gpu->psObcData->Upload();
gpu->psPosq4->Upload();
amoebaGpu->amoebaSim.gkc = 2.455f;
amoebaGpu->amoebaSim.dwater = solventDielectric;
amoebaGpu->amoebaSim.dielec = innerDielectric;
amoebaGpu->amoebaSim.fc = 1.0f*(1.0f-solventDielectric)/(0.0f+1.0f*solventDielectric);;
amoebaGpu->amoebaSim.fd = 2.0f*(1.0f-solventDielectric)/(1.0f+2.0f*solventDielectric);;
amoebaGpu->amoebaSim.fq = 3.0f*(1.0f-solventDielectric)/(2.0f+3.0f*solventDielectric);;
gpu->sim.preFactor = -amoebaGpu->amoebaSim.electric*((1.0f/innerDielectric)-(1.0f/solventDielectric));
// logging info
if( amoebaGpu->log ){
unsigned int maxPrint = MAX_PARAMETER_PRINT;
unsigned int maxIndex = particles;
(void) fprintf( amoebaGpu->log,"gpuSetAmoebaGrycukParameters: cavity=%d dielectricOffset=%15.7e probeRadius=%15.7e surfaceAreaFactor=%15.7e\n",
includeCavityTerm, dielectricOffset, probeRadius, surfaceAreaFactor );
(void) fprintf( amoebaGpu->log," gkc=%12.3f solventDielectric=%15.7e innerDielectric=%15.7e sim.preFactor=%15.7e\n",
amoebaGpu->amoebaSim.gkc, amoebaGpu->amoebaSim.dwater, amoebaGpu->amoebaSim.dielec, gpu->sim.preFactor );
(void) fprintf( amoebaGpu->log," fc=%15.7e fd=%15.7e fq=%15.7e\n",
amoebaGpu->amoebaSim.fc, amoebaGpu->amoebaSim.fq, amoebaGpu->amoebaSim.fq );
(void) fprintf( amoebaGpu->log,"\nRadius scl*radius scl\n" );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < maxIndex; ii++ ){
(void) fprintf( amoebaGpu->log,"%6d %15.7e %15.7e %15.7e\n", ii,
(*gpu->psObcData)[ii].x, (*gpu->psObcData)[ii].y, scale[ii] );
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = maxIndex - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
gpuRotationToLabFrameAllocate( amoebaGpu );
gpuFixedEFieldAllocate( amoebaGpu );
gpuElectrostaticAllocate( amoebaGpu );
gpuKirkwoodAllocate( amoebaGpu );
}
static int encodeCell( unsigned int x, unsigned int y ){
return ( (x << 17) | (y << 2) );
}
......@@ -2068,22 +2295,15 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
}
}
if( particles < 1 ){
(void) fprintf( stderr, "%s number of particles\n", methodName );
return;
}
amoebaGpu->psVdwSigmaEpsilon = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "VdwSigmaEpsilon");
for (unsigned int ii = 0; ii < particles; ii++)
{
for( unsigned int ii = 0; ii < particles; ii++ ){
amoebaGpu->psVdwSigmaEpsilon->_pSysData[ii].x = sigmas[ii];
amoebaGpu->psVdwSigmaEpsilon->_pSysData[ii].y = epsilons[ii];
}
// Dummy out extra particles data
for (unsigned int ii = particles; ii < gpu->sim.paddedNumberOfAtoms; ii++)
{
for( unsigned int ii = particles; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
amoebaGpu->psVdwSigmaEpsilon->_pSysData[ii].x = 1.0f;
amoebaGpu->psVdwSigmaEpsilon->_pSysData[ii].y = 0.0f;
}
......@@ -2114,23 +2334,12 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
unsigned int numberOfNonReductions = 0;
unsigned int numberOfReductions = 0;
for (unsigned int ii = 0; ii < particles; ii++)
{
if( ivNonMapping[ii] )
{
for( unsigned int ii = 0; ii < particles; ii++ ){
if( ivNonMapping[ii] ){
numberOfNonReductions++;
}
if( ivMapping[ii].size() > 0 )
{
if( ivMapping[ii].size() > 0 ){
numberOfReductions++;
#ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "Atom %u has %u reductions: [", ii, ivMapping[ii].size() );
for (unsigned int jj = 0; jj < ivMapping[ii].size(); jj++)
{
(void) fprintf( amoebaGpu->log, "%u ", ivMapping[ii][jj] );
}
(void) fprintf( amoebaGpu->log, "] %12.4f\n", reductionFactors[ii] );
#endif
}
}
......@@ -2152,15 +2361,12 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
unsigned int count = 0;
unsigned int nonCount = 0;
for (unsigned int ii = 0; ii < particles; ii++)
{
if( ivNonMapping[ii] )
{
for( unsigned int ii = 0; ii < particles; ii++ ){
if( ivNonMapping[ii] ){
psVdwNonReductionID->_pSysData[nonCount++] = ii;
}
if( ivMapping[ii].size() > 0 )
{
if( ivMapping[ii].size() > 0 ){
psAmoebaVdwReduction->_pSysData[count] = reductionFactors[ii];
psVdwReductionID->_pSysData[count].x = ii;
psVdwReductionID->_pSysData[count].y = ivMapping[ii][0];
......@@ -2186,79 +2392,108 @@ void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
psAmoebaVdwReduction->Upload();
amoebaGpu->vdwExclusions.resize( gpu->natoms );
for( unsigned int ii = 0; ii < gpu->natoms; ii++)
{
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
for (unsigned int jj = 0; jj < allExclusions[ii].size(); jj++){
amoebaGpu->vdwExclusions[ii].push_back( allExclusions[ii][jj] );
}
}
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
unsigned int maxPrint = 32;
unsigned int maxPrint = MAX_PARAMETER_PRINT;
(void) fprintf( amoebaGpu->log, "%s sigma/epsilon combining rules=%d %d\n", methodName,
amoebaGpu->vdwSigmaCombiningRule, amoebaGpu->vdwEpsilonCombiningRule);
for (unsigned int ii = 0; ii < gpu->natoms; ii++)
{
(void) fprintf( amoebaGpu->log, "%5u %15.7e %15.7e Ex[", ii, sigmas[ii], epsilons[ii] );
for (unsigned int jj = 0; jj < allExclusions[ii].size(); jj++){
(void) fprintf( amoebaGpu->log, "%d ", allExclusions[ii][jj] );
(void) fprintf( amoebaGpu->log, "%s particles=%d numberOfNonReductions=%d numberOfReductionsi=%d\n",
methodName, particles, numberOfNonReductions, numberOfReductions );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5u %15.7e %15.7e %15.7e Ex[", ii, sigmas[ii], epsilons[ii], reductionFactors[ii] );
for( unsigned int jj = 0; jj < allExclusions[ii].size(); jj++ ){
(void) fprintf( amoebaGpu->log, "%6d ", allExclusions[ii][jj] );
}
(void) fprintf( amoebaGpu->log, "]\n", ii, sigmas[ii], epsilons[ii] );
if( ii == maxPrint && ii < (gpu->sim.paddedNumberOfAtoms - maxPrint) )
{
ii = (gpu->sim.addedNumberOfAtoms - maxPrint);
if( ii == maxPrint ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = (gpu->sim.paddedNumberOfAtoms - maxPrint);
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
#endif
#undef AMOEBA_DEBUG
}
extern "C"
void gpuSetAmoebaPMEParameters(amoebaGpuContext amoebaGpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ)
{
gpuContext gpu = amoebaGpu->gpuContext;
gpu->sim.alphaEwald = alpha;
int3 gridSize = make_int3(gridSizeX, gridSizeY, gridSizeZ);
gpu->sim.pmeGridSize = gridSize;
gpuContext gpu = amoebaGpu->gpuContext;
gpu->sim.alphaEwald = alpha;
int3 gridSize = make_int3(gridSizeX, gridSizeY, gridSizeZ);
gpu->sim.pmeGridSize = gridSize;
int3 groupSize = make_int3(2, 4, 4);
gpu->sim.pmeGroupSize = groupSize;
const int3 numGroups = make_int3((gridSize.x+groupSize.x-1)/groupSize.x, (gridSize.y+groupSize.y-1)/groupSize.y, (gridSize.z+groupSize.z-1)/groupSize.z);
const unsigned int totalGroups = numGroups.x*numGroups.y*numGroups.z;
// logging info
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "gpuSetAmoebaPMEParameters alpha=%15.7e grid: [%d %d %d]\n",
gpu->sim.alphaEwald , gridSizeX, gridSizeY, gridSizeZ );
}
const int3 numGroups = make_int3((gridSize.x+groupSize.x-1)/groupSize.x, (gridSize.y+groupSize.y-1)/groupSize.y, (gridSize.z+groupSize.z-1)/groupSize.z);
const unsigned int totalGroups = numGroups.x*numGroups.y*numGroups.z;
cufftPlan3d(&gpu->fftplan, gridSize.x, gridSize.y, gridSize.z, CUFFT_C2C);
gpu->psPmeGrid = new CUDAStream<cufftComplex>(gridSize.x*gridSize.y*gridSize.z, 1, "PmeGrid");
gpu->sim.pPmeGrid = gpu->psPmeGrid->_pDevData;
gpu->psPmeBsplineModuli[0] = new CUDAStream<float>(gridSize.x, 1, "PmeBsplineModuli0");
gpu->sim.pPmeBsplineModuli[0] = gpu->psPmeBsplineModuli[0]->_pDevData;
gpu->psPmeBsplineModuli[1] = new CUDAStream<float>(gridSize.y, 1, "PmeBsplineModuli1");
gpu->sim.pPmeBsplineModuli[1] = gpu->psPmeBsplineModuli[1]->_pDevData;
gpu->psPmeBsplineModuli[2] = new CUDAStream<float>(gridSize.z, 1, "PmeBsplineModuli2");
gpu->sim.pPmeBsplineModuli[2] = gpu->psPmeBsplineModuli[2]->_pDevData;
amoebaGpu->psThetai1 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai1");
amoebaGpu->amoebaSim.pThetai1 = amoebaGpu->psThetai1->_pDevData;
amoebaGpu->psThetai2 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai2");
amoebaGpu->amoebaSim.pThetai2 = amoebaGpu->psThetai2->_pDevData;
amoebaGpu->psThetai3 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai3");
amoebaGpu->amoebaSim.pThetai3 = amoebaGpu->psThetai3->_pDevData;
amoebaGpu->psIgrid = new CUDAStream<int4>(gpu->natoms, 1, "igrid");
amoebaGpu->amoebaSim.pIgrid = amoebaGpu->psIgrid->_pDevData;
amoebaGpu->psPhi = new CUDAStream<float>(20*gpu->natoms, 1, "phi");
amoebaGpu->amoebaSim.pPhi = amoebaGpu->psPhi->_pDevData;
amoebaGpu->psPhid = new CUDAStream<float>(10*gpu->natoms, 1, "phid");
amoebaGpu->amoebaSim.pPhid = amoebaGpu->psPhid->_pDevData;
amoebaGpu->psPhip = new CUDAStream<float>(10*gpu->natoms, 1, "phip");
amoebaGpu->amoebaSim.pPhip = amoebaGpu->psPhip->_pDevData;
amoebaGpu->psPhidp = new CUDAStream<float>(20*gpu->natoms, 1, "phidp");
amoebaGpu->amoebaSim.pPhidp = amoebaGpu->psPhidp->_pDevData;
gpu->psPmeAtomRange = new CUDAStream<int>(gridSize.x*gridSize.y*gridSize.z+1, 1, "PmeAtomRange");
gpu->sim.pPmeAtomRange = gpu->psPmeAtomRange->_pDevData;
gpu->psPmeAtomGridIndex = new CUDAStream<int2>(gpu->natoms, 1, "PmeAtomGridIndex");
gpu->sim.pPmeAtomGridIndex = gpu->psPmeAtomGridIndex->_pDevData;
gpu->psPmeBsplineTheta = new CUDAStream<float4>(1, 1, "PmeBsplineTheta"); // Not actually uesd
gpu->psPmeBsplineDtheta = new CUDAStream<float4>(1, 1, "PmeBsplineDtheta"); // Not actually used
gpu->psPmeGrid = new CUDAStream<cufftComplex>(gridSize.x*gridSize.y*gridSize.z, 1, "PmeGrid");
gpu->sim.pPmeGrid = gpu->psPmeGrid->_pDevData;
gpu->psPmeBsplineModuli[0] = new CUDAStream<float>(gridSize.x, 1, "PmeBsplineModuli0");
gpu->sim.pPmeBsplineModuli[0] = gpu->psPmeBsplineModuli[0]->_pDevData;
gpu->psPmeBsplineModuli[1] = new CUDAStream<float>(gridSize.y, 1, "PmeBsplineModuli1");
gpu->sim.pPmeBsplineModuli[1] = gpu->psPmeBsplineModuli[1]->_pDevData;
gpu->psPmeBsplineModuli[2] = new CUDAStream<float>(gridSize.z, 1, "PmeBsplineModuli2");
gpu->sim.pPmeBsplineModuli[2] = gpu->psPmeBsplineModuli[2]->_pDevData;
amoebaGpu->psThetai1 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai1");
amoebaGpu->amoebaSim.pThetai1 = amoebaGpu->psThetai1->_pDevData;
amoebaGpu->psThetai2 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai2");
amoebaGpu->amoebaSim.pThetai2 = amoebaGpu->psThetai2->_pDevData;
amoebaGpu->psThetai3 = new CUDAStream<float4>(AMOEBA_PME_ORDER*gpu->natoms, 1, "thetai3");
amoebaGpu->amoebaSim.pThetai3 = amoebaGpu->psThetai3->_pDevData;
amoebaGpu->psIgrid = new CUDAStream<int4>(gpu->natoms, 1, "igrid");
amoebaGpu->amoebaSim.pIgrid = amoebaGpu->psIgrid->_pDevData;
amoebaGpu->psPhi = new CUDAStream<float>(20*gpu->natoms, 1, "phi");
amoebaGpu->amoebaSim.pPhi = amoebaGpu->psPhi->_pDevData;
amoebaGpu->psPhid = new CUDAStream<float>(10*gpu->natoms, 1, "phid");
amoebaGpu->amoebaSim.pPhid = amoebaGpu->psPhid->_pDevData;
amoebaGpu->psPhip = new CUDAStream<float>(10*gpu->natoms, 1, "phip");
amoebaGpu->amoebaSim.pPhip = amoebaGpu->psPhip->_pDevData;
amoebaGpu->psPhidp = new CUDAStream<float>(20*gpu->natoms, 1, "phidp");
amoebaGpu->amoebaSim.pPhidp = amoebaGpu->psPhidp->_pDevData;
gpu->psPmeAtomRange = new CUDAStream<int>(gridSize.x*gridSize.y*gridSize.z+1, 1, "PmeAtomRange");
gpu->sim.pPmeAtomRange = gpu->psPmeAtomRange->_pDevData;
gpu->psPmeAtomGridIndex = new CUDAStream<int2>(gpu->natoms, 1, "PmeAtomGridIndex");
gpu->sim.pPmeAtomGridIndex = gpu->psPmeAtomGridIndex->_pDevData;
gpu->psPmeBsplineTheta = new CUDAStream<float4>(1, 1, "PmeBsplineTheta"); // Not actually uesd
gpu->psPmeBsplineDtheta = new CUDAStream<float4>(1, 1, "PmeBsplineDtheta"); // Not actually used
// Initialize the b-spline moduli.
......@@ -2624,22 +2859,23 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
amoebaGpu->amoebaSim.shctd = shctd;
amoebaGpu->amoebaSim.dispoff = dispoff;
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
unsigned int maxPrint = 10;
unsigned int maxPrint = MAX_PARAMETER_PRINT;
(void) fprintf( amoebaGpu->log, "%s particles=%u total max dispersion energy=%14.5e eps[%14.5e %14.5e] rmin[%14.5e %14.5e] awtr=%14.5e shctd=%14.5e dispoff=%14.5e\n",
methodName, radii.size(), totalMaxWcaDisperionEnergy, epso, epsh, rmino, rminh, awater, shctd, dispoff );
for (unsigned int ii = 0; ii < gpu->natoms; ii++)
{
methodName, static_cast<unsigned int>(radii.size()), totalMaxWcaDispersionEnergy, epso, epsh, rmino, rminh, awater, shctd, dispoff );
#ifdef PARAMETER_PRINT
for( unsigned int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5u %15.7e %15.7e\n", ii, radii[ii], epsilons[ii] );
if( ii == maxPrint && ii < (paddedNumberOfAtoms - maxPrint) )
{
if( ii == maxPrint && ii < (paddedNumberOfAtoms - maxPrint) ){
(void) fprintf( amoebaGpu->log, "\n" );
ii = (paddedNumberOfAtoms - maxPrint);
if( ii < maxPrint )ii = maxPrint;
}
}
(void) fprintf( amoebaGpu->log, "\n" );
#endif
(void) fflush( amoebaGpu->log );
}
#endif
}
......@@ -2770,37 +3006,39 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
}
extern "C"
void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu, int updateFlag )
{
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "In amoebaGpuSetConstants\n" );
(void) fflush( amoebaGpu->log );
}
if( updateFlag == 0 ){
if( amoebaGpu->amoebaSim.dielec > 0.0f && amoebaGpu->amoebaSim.dwater > 0.0f ){
amoebaGpu->gpuContext->sim.preFactor = -amoebaGpu->amoebaSim.electric*((1.0f/amoebaGpu->amoebaSim.dielec)-(1.0f/amoebaGpu->amoebaSim.dwater));
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "AmoebaGpuSetConstants %d\n", updateFlag );
(void) fflush( amoebaGpu->log );
}
if( amoebaGpu->amoebaSim.dielec > 0.0f && amoebaGpu->amoebaSim.dwater > 0.0f ){
amoebaGpu->gpuContext->sim.preFactor = -amoebaGpu->amoebaSim.electric*((1.0f/amoebaGpu->amoebaSim.dielec)-(1.0f/amoebaGpu->amoebaSim.dwater));
}
gpuSetAmoebaBondOffsets( amoebaGpu );
SetCalculateAmoebaLocalForcesSim( amoebaGpu );
SetCalculateAmoebaCudaWcaDispersionSim( amoebaGpu );
SetCalculateAmoebaKirkwoodSim( amoebaGpu );
SetCalculateAmoebaKirkwoodEDiffSim( amoebaGpu );
SetCalculateAmoebaGrycukSim( amoebaGpu );
}
gpuSetAmoebaBondOffsets( amoebaGpu );
SetCalculateAmoebaLocalForcesSim( amoebaGpu );
SetForcesSim( amoebaGpu->gpuContext );
SetCalculateAmoebaCudaUtilitiesSim( amoebaGpu );
SetCalculateAmoebaMultipoleForcesSim( amoebaGpu );
SetCalculateAmoebaCudaFixedEFieldSim( amoebaGpu );
SetCalculateAmoebaCudaVdw14_7Sim( amoebaGpu );
SetCalculateAmoebaCudaWcaDispersionSim( amoebaGpu );
SetCalculateAmoebaCudaMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeMutualInducedFieldSim( amoebaGpu );
SetCalculateAmoebaCudaPmeFixedEFieldSim( amoebaGpu );
SetCalculateAmoebaElectrostaticSim( amoebaGpu );
SetCalculateAmoebaPmeDirectElectrostaticSim( amoebaGpu );
SetCalculateAmoebaCudaMapTorquesSim( amoebaGpu );
SetCalculateAmoebaKirkwoodSim( amoebaGpu );
SetCalculateAmoebaCudaUtilitiesSim( amoebaGpu );
SetCalculateAmoebaKirkwoodEDiffSim( amoebaGpu );
SetCalculateAmoebaCudaFixedEAndGKFieldsSim( amoebaGpu );
SetCalculateAmoebaCudaMutualInducedAndGkFieldsSim( amoebaGpu );
SetCalculateObcGbsaForces2Sim( amoebaGpu->gpuContext );
SetCalculateAmoebaPMESim( amoebaGpu );
}
......
......@@ -124,9 +124,15 @@ extern void GetCalculateAmoebaKirkwoodEDiffSim( amoebaGpuContext amoebaGpu );
//extern void cudaComputeAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu );
extern void SetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
extern void GetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
extern void cudaComputeAmoebaBornRadii( amoebaGpuContext amoebaGpu );
//extern void SetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
//extern void GetCalculateAmoebaObcGbsaBornSumSim( gpuContext gpu );
//extern void cudaComputeAmoebaBornRadii( amoebaGpuContext amoebaGpu );
extern void kCalculateAmoebaGrycukBornRadii( amoebaGpuContext amoebaGpu );
extern void kReduceGrycukGbsaBornSum( amoebaGpuContext gpu );
extern void SetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu );
extern void GetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu );
extern void kCalculateGrycukGbsaForces2( amoebaGpuContext amoebaGpu );
// OBC -- Part 1
//extern void SetCalculateObcGbsaForces1Sim(gpuContext gpu);
......
......@@ -288,6 +288,11 @@ void gpuSetAmoebaObcParameters( amoebaGpuContext amoebaGpu , float innerDielectr
const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge,
int includeCavityTerm, float probeRadius, float surfaceAreaFactor);
extern "C"
void gpuSetAmoebaGrycukParameters( amoebaGpuContext amoebaGpu , float innerDielectric, float solventDielectric, float dielectricOffset,
const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge,
int includeCavityTerm, float probeRadius, float surfaceAreaFactor);
extern "C"
void gpuSetAmoebaVdwParameters( amoebaGpuContext amoebaGpu,
const std::vector<int>& indexIVs,
......@@ -313,7 +318,7 @@ void gpuSetAmoebaWcaDispersionParameters( amoebaGpuContext amoebaGpu,
const float awater, const float shctd, const float dispoff );
extern "C"
void amoebaGpuSetConstants(amoebaGpuContext gpu);
void amoebaGpuSetConstants(amoebaGpuContext gpu, int updateFlag );
extern "C"
void gpuSetAmoebaBondOffsets(amoebaGpuContext gpu);
......
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "cudaKernels.h"
#include "amoebaCudaKernels.h"
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
void SetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaGrycukSim: cudaMemcpyToSymbol: SetSim copy to cSim failed");
status = cudaMemcpyToSymbol(cAmoebaSim, &amoebaGpu->amoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "SetCalculateAmoebaGrycukSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed");
}
void GetCalculateAmoebaGrycukSim(amoebaGpuContext amoebaGpu)
{
cudaError_t status;
gpuContext gpu = amoebaGpu->gpuContext;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaGrycukSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed");
status = cudaMemcpyFromSymbol(&amoebaGpu->amoebaSim, cAmoebaSim, sizeof(cudaAmoebaGmxSimulation));
RTERROR(status, "GetCalculateAmoebaGrycukSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
}
struct GrycukParticle {
float x;
float y;
float z;
float radius;
float scaledRadius;
float bornSum;
};
__device__ void loadGrycukShared( struct GrycukParticle* sA, unsigned int atomI )
{
// coordinates, radii and scaled radii
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->radius = cSim.pObcData[atomI].x;
sA->scaledRadius = cSim.pObcData[atomI].y;
}
__device__ void calculateGrycukBornRadiiPairIxn_kernel( GrycukParticle& atomI, GrycukParticle& atomJ, float* bornSum ){
/*
* radius: radius (TINKER rsolv)
* scaledRadius: radius*overlap scale factor (TINKER rsolv*shct)
*
*/
float xr,yr,zr;
float r,r2;
float sk, sk2;
float lik, uik;
float lik3, uik3;
float l2, l4, lr, l4r;
float u2, u4, ur, u4r;
float term;
// decide whether to compute the current interaction;
*bornSum = 0.0f;
if( atomI.radius <= 0.0f ){
return;
}
xr = atomJ.x - atomI.x;
yr = atomJ.y - atomI.y;
zr = atomJ.z - atomI.z;
r2 = xr*xr + yr*yr + zr*zr;
r = sqrt(r2);
sk = atomJ.scaledRadius;
sk2 = sk*sk;
if( (atomI.radius + r) < sk ){
lik = atomI.radius;
uik = sk - r;
lik3 = lik*lik*lik;
uik3 = uik*uik*uik;
*bornSum -= (1.0f/uik3 - 1.0f/lik3);
}
uik = r + sk;
if( (atomI.radius + r) < sk ){
lik = sk - r;
} else if( r < (atomI.radius + sk) ){
lik = atomI.radius;
} else {
lik = r - sk;
}
l2 = lik*lik;
l4 = l2*l2;
lr = lik*r;
l4r = l4*r;
u2 = uik*uik;
u4 = u2*u2;
ur = uik*r;
u4r = u4*r;
term = (3.0f*(r2-sk2)+6.0f*u2-8.0f*ur)/u4r - (3.0f*(r2-sk2)+6.0f*l2-8.0f*lr)/l4r;
*bornSum += term/16.0f;
}
__device__ void zeroGrycukParticleSharedField( struct GrycukParticle* sA )
{
sA->bornSum = 0.0f;
}
__global__
__launch_bounds__(384, 1)
void kReduceGrycukGbsaBornSum_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float sum = 0.0f;
float* pSt = cSim.pBornSum + pos;
// Get summed Born data
for (int i = 0; i < cSim.nonbondOutputBuffers; i++)
{
sum += *pSt;
pSt += cSim.stride;
}
// Now calculate Born radius
float radius = cSim.pObcData[pos].x;
radius = 1.0f/(radius*radius*radius);
sum = radius - sum;
sum = sum <= 0.0f ? 1000.0f : pow( sum, -1.0f/3.0f );
cSim.pBornRadii[pos] = sum;
pos += gridDim.x * blockDim.x;
}
}
/**---------------------------------------------------------------------------------------
Reduce Born radii
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kReduceGrycukGbsaBornSum( amoebaGpuContext amoebaGpu )
{
kReduceGrycukGbsaBornSum_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>();
LAUNCHERROR("kReduceGrycukGbsaBornSum");
if( 1 ){
static int callId = 0;
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( callId++ );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornRadii, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "BornRGry", fileId, outputVector );
}
}
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaGrycukBornRadii.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaGrycukBornRadii.h"
/**---------------------------------------------------------------------------------------
Compute Born radii using Grycuk algorithm
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kCalculateAmoebaGrycukBornRadii( amoebaGpuContext amoebaGpu )
{
#ifdef AMOEBA_DEBUG
static const char* methodName = "kCalculateAmoebaGrycukBornRadii";
static int timestep = 0;
std::vector<int> fileId;
timestep++;
fileId.resize( 2 );
fileId[0] = timestep;
fileId[1] = 1;
#endif
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// apparently debug array can take up nontrivial no. registers
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
amoebaGpu->scalingDistanceCutoff );
}
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysData, 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload();
unsigned int targetAtom = 0;
gpu->psBornRadii->Download();
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Grycuk input\n" ); (void) fflush( amoebaGpu->log );
for( int ii = 0; ii < amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log,"Born %6d %16.9e\n", ii,
gpu->psBornRadii->_pSysData[ii] );
}
}
#endif
// on first pass, set threads/block and based on that setting the energy buffer array
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(GrycukParticle), gpu->sharedMemoryPerBlock ), maxThreads);
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaGrycuk: blcks=%u tds=%u %u bPrWrp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, maxThreads, gpu->bOutputBufferPerWarp,
sizeof(GrycukParticle), sizeof(GrycukParticle)*threadsPerBlock,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
}
#endif
}
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaGrycukN2Forces%swarp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u\n",
(gpu->bOutputBufferPerWarp ? " " : " no "), gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(GrycukParticle), sizeof(GrycukParticle)*threadsPerBlock,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
}
#endif
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaGrycukBornRadiiN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
} else {
kCalculateAmoebaGrycukBornRadiiN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData);
}
LAUNCHERROR("kCalculateAmoebaCudaGrycukN2Forces");
// ---------------------------------------------------------------------------------------
}
// Born radius chain rule component for Grycuk
struct GrycukChainRuleParticle {
float x;
float y;
float z;
float radius;
float scaledRadius;
float bornRadius;
float bornForce;
float force[3];
};
__device__ void loadGrycukChainRuleParticleShared( struct GrycukChainRuleParticle* sA, unsigned int atomI )
{
// coordinates, radii and scaled radii
sA->x = cSim.pPosq[atomI].x;
sA->y = cSim.pPosq[atomI].y;
sA->z = cSim.pPosq[atomI].z;
sA->radius = cSim.pObcData[atomI].x;
sA->scaledRadius = cSim.pObcData[atomI].y;
sA->bornRadius = cSim.pBornRadii[atomI];
sA->bornForce = cSim.pBornForce[atomI];
}
__device__ void zeroGrycukChainRuleParticleSharedField( struct GrycukChainRuleParticle* sA )
{
// zero force
sA->force[0] = 0.0f;
sA->force[1] = 0.0f;
sA->force[2] = 0.0f;
}
//#define AMOEBA_DEBUG
__device__ void calculateGrycukChainRulePairIxn_kernel( GrycukChainRuleParticle& atomI, GrycukChainRuleParticle& atomJ, float force[3]
#ifdef AMOEBA_DEBUG
, float4 pullDebug[5]
#endif
){
const float pi = 3.1415926535897f;
float third = 1.0f/3.0f;
float pi43 = 4.0f*third*pi;
float lik, uik;
float lik4, uik4;
float factor = -pow(pi,third)*pow(6.0f,(2.0f*third))/9.0f;
float term = pi43/(atomI.bornRadius*atomI.bornRadius*atomI.bornRadius);
term = factor/pow( term, (4.0f*third) );
float xr = atomJ.x - atomI.x;
float yr = atomJ.y - atomI.y;
float zr = atomJ.z - atomI.z;
float sk = atomJ.scaledRadius;
float sk2 = sk*sk;
float r2 = xr*xr + yr*yr + zr*zr;
float r = sqrt(r2);
float de = 0.0f;
if( (atomI.radius + r) < sk ){
float uik4;
uik = sk - r;
uik4 = uik*uik;
uik4 = uik4*uik4;
de = -4.0f*pi/uik4;
}
if( (atomI.radius + r) < sk){
lik = sk - r;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(sk2-4.0f*sk*r+17.0f*r2)/ (r2*lik4);
} else if( r < (atomI.radius +sk) ){
lik = atomI.radius;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(2.0f*atomI.radius*atomI.radius-sk2-r2)/ (r2*lik4);
} else {
lik = r - sk;
lik4 = lik*lik;
lik4 = lik4*lik4;
de += 0.25f*pi*(sk2-4.0f*sk*r+r2)/ (r2*lik4);
}
uik = r + sk;
uik4 = uik*uik;
uik4 = uik4*uik4;
de -= 0.25f*pi*(sk2+4.0f*sk*r+r2)/ (r2*uik4);
float dbr = term * de/r;
de = dbr*atomI.bornForce;
#ifdef AMOEBA_DEBUG
pullDebug[0].x = de;
pullDebug[0].y = r;
pullDebug[0].z = factor;
pullDebug[0].w = -4.0f;
pullDebug[1].x = atomI.bornForce/4.184f;
pullDebug[1].y = atomI.bornRadius;
pullDebug[1].z = atomJ.bornForce/4.184f;
pullDebug[1].w = -5.0f;
#endif
force[0] = xr*de;
force[1] = yr*de;
force[2] = zr*de;
}
// Include versions of the kernels for N^2 calculations.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaGrycukChainRule.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaGrycukChainRule.h"
/**---------------------------------------------------------------------------------------
Compute Grycuk chain rule contribution to force
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void kCalculateGrycukGbsaForces2( amoebaGpuContext amoebaGpu )
{
#ifdef AMOEBA_DEBUG
static const char* methodName = "kCalculateGrycukGbsaForces2";
static int timestep = 0;
std::vector<int> fileId;
timestep++;
fileId.resize( 2 );
fileId[0] = timestep;
fileId[1] = 1;
#endif
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext;
// apparently debug array can take up nontrivial no. registers
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d ZZZ\n",
methodName, gpu->natoms, amoebaGpu->maxCovalentDegreeSz );
}
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(20*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysData, 0, sizeof( float )*4*20*paddedNumberOfAtoms);
debugArray->Upload();
unsigned int targetAtom = 0;
gpu->psBornRadii->Download();
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Grycuk input\n" ); (void) fflush( amoebaGpu->log );
for( int ii = 0; ii < amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log,"Born %6d %16.9e\n", ii,
gpu->psBornRadii->_pSysData[ii] );
}
}
#endif
// on first pass, set threads/block and based on that setting the energy buffer array
static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){
unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
//maxThreads = 384;
maxThreads = 512;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock(amoebaGpu, sizeof(GrycukChainRuleParticle), gpu->sharedMemoryPerBlock ), maxThreads);
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaGrycuk: blcks=%u tds=%u %u bPrWrp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, maxThreads, gpu->bOutputBufferPerWarp,
sizeof(GrycukChainRuleParticle), sizeof(GrycukChainRuleParticle)*threadsPerBlock,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
}
#endif
}
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaGrycukN2Forces%swarp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u\n",
(gpu->bOutputBufferPerWarp ? " " : " no "), gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(GrycukChainRuleParticle), sizeof(GrycukChainRuleParticle)*threadsPerBlock,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
}
#endif
//kClearForces( gpu );
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaGrycukChainRuleN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukChainRuleParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData
#ifdef AMOEBA_DEBUG
,debugArray->_pDevData, targetAtom
#endif
);
} else {
kCalculateAmoebaGrycukChainRuleN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(GrycukChainRuleParticle)*threadsPerBlock>>>( gpu->psWorkUnit->_pDevData
#ifdef AMOEBA_DEBUG
,debugArray->_pDevData, targetAtom
#endif
);
}
LAUNCHERROR("kCalculateAmoebaCudaGrycukN2Forces");
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
debugArray->Download();
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d %5d DebugGrycukChain\n", targetAtom, jj );
for( int kk = 0; kk < 7; kk++ ){
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w );
debugIndex += paddedNumberOfAtoms;
}
(void) fprintf( amoebaGpu->log,"\n" );
}
}
#endif
if( 0 ){
static int callId = 0;
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( callId++ );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
//cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psLabFrameDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
CUDAStream<float>* temp = new CUDAStream<float>(3*gpu->sim.paddedNumberOfAtoms, 1, "Temp1");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, gpu->psAtomIndex->_pSysData, 1.0f/4.184f );
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornForce, outputVector, gpu->psAtomIndex->_pSysData, 1.0f/4.184f );
cudaLoadCudaFloatArray( gpu->natoms, 1, gpu->psBornRadii, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "GryF", fileId, outputVector );
delete temp;
exit(0);
}
// ---------------------------------------------------------------------------------------
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaGrycukBornRadii, _kernel)( unsigned int* workUnit ){
extern __shared__ GrycukParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
GrycukParticle* psA = &sA[tbx];
unsigned int atomI = x + tgx;
GrycukParticle localParticle;
loadGrycukShared( &localParticle, atomI );
float bornSum = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// load shared data
loadGrycukShared( &(sA[threadIdx.x]), atomI );
for (unsigned int j = 0; j < GRID; j++)
{
float localBornSum;
calculateGrycukBornRadiiPairIxn_kernel( localParticle, psA[j], &localBornSum );
bornSum += ( (atomI == (y + j)) || (atomI >= cSim.atoms) || ((y+j) >= cSim.atoms) ) ? 0.0 : localBornSum;
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += bornSum;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = bornSum;
#endif
} else {
if (lasty != y) {
unsigned int atomJ = y + tgx;
loadGrycukShared( &(sA[threadIdx.x]), atomJ );
}
// zero shared fields
zeroGrycukParticleSharedField( &(sA[threadIdx.x]) );
for (unsigned int j = 0; j < GRID; j++)
{
float localBornSum;
calculateGrycukBornRadiiPairIxn_kernel( localParticle, psA[tj], &localBornSum );
bornSum += ( (atomI >= cSim.atoms) || ((y+tj) >= cSim.atoms) ) ? 0.0 : localBornSum;
calculateGrycukBornRadiiPairIxn_kernel( psA[tj], localParticle, &localBornSum );
psA[tj].bornSum += ( (atomI >= cSim.atoms) || ((y+tj) >= cSim.atoms) ) ? 0.0 : localBornSum;
tj = (tj + 1) & (GRID - 1);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += bornSum;
offset = y + tgx + warp*cSim.stride;
cSim.pBornSum[offset] += sA[threadIdx.x].bornSum;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = bornSum;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = sA[threadIdx.x].bornSum;
#endif
lasty = y;
}
pos++;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void METHOD_NAME(kCalculateAmoebaGrycukChainRule, _kernel)( unsigned int* workUnit
#ifdef AMOEBA_DEBUG
, float4* debugArray, unsigned int targetAtom
#endif
){
extern __shared__ GrycukChainRuleParticle sAChainRule[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int lasty = 0xFFFFFFFF;
#ifdef AMOEBA_DEBUG
float4 pullDebug[5];
#endif
while (pos < end)
{
unsigned int x;
unsigned int y;
bool bExclusionFlag;
// Extract cell coordinates
decodeCell( workUnit[pos], &x, &y, &bExclusionFlag );
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
GrycukChainRuleParticle* psAChainRule = &sAChainRule[tbx];
unsigned int atomI = x + tgx;
GrycukChainRuleParticle localParticle;
loadGrycukChainRuleParticleShared( &localParticle, atomI );
zeroGrycukChainRuleParticleSharedField( &localParticle );
if (x == y){
// load shared data and zero force
loadGrycukChainRuleParticleShared( &(sAChainRule[threadIdx.x]), atomI );
zeroGrycukChainRuleParticleSharedField( &(sAChainRule[threadIdx.x]));
for (unsigned int j = (tgx+1)&(GRID-1); j != tgx; j = (j+1)&(GRID-1))
{
float localForce[3];
calculateGrycukChainRulePairIxn_kernel( localParticle, psAChainRule[j], localForce
#ifdef AMOEBA_DEBUG
, pullDebug
#endif
);
if( (atomI != (y + j)) && (atomI < cSim.atoms) && ((y+j) < cSim.atoms) ){
localParticle.force[0] -= localForce[0];
localParticle.force[1] -= localForce[1];
localParticle.force[2] -= localForce[2];
psAChainRule[j].force[0] += localForce[0];
psAChainRule[j].force[1] += localForce[1];
psAChainRule[j].force[2] += localForce[2];
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = (atomI == targetAtom) ? (y + j) : atomI;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j);
debugArray[index].z = -1.0f;
debugArray[index].w = -1.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) x;
debugArray[index].y = (float) y;
debugArray[index].z = (float) tgx;
debugArray[index].w = -2.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = pullDebug[0].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = localForce[0];
debugArray[index].y = localForce[1];
debugArray[index].z = localForce[2];
debugArray[index].w = -12.0f;
calculateGrycukChainRulePairIxn_kernel( psAChainRule[j], localParticle, localForce , pullDebug );
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = -13.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = localForce[0];
debugArray[index].y = localForce[1];
debugArray[index].z = localForce[2];
debugArray[index].w = -14.0f;
}
#endif
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4[offset];
of.x += localParticle.force[0] + sAChainRule[threadIdx.x].force[0];
of.y += localParticle.force[1] + sAChainRule[threadIdx.x].force[1];
of.z += localParticle.force[2] + sAChainRule[threadIdx.x].force[2];
cSim.pForce4[offset] = of;
} else {
if (lasty != y) {
unsigned int atomJ = y + tgx;
loadGrycukChainRuleParticleShared( &(sAChainRule[threadIdx.x]), atomJ );
}
// zero shared fields
zeroGrycukChainRuleParticleSharedField( &(sAChainRule[threadIdx.x]) );
for (unsigned int j = 0; j < GRID; j++)
{
if( (atomI < cSim.atoms) && ((y+tj) < cSim.atoms) ){
float localForce[3];
calculateGrycukChainRulePairIxn_kernel( localParticle, psAChainRule[tj], localForce
#ifdef AMOEBA_DEBUG
, pullDebug
#endif
);
localParticle.force[0] -= localForce[0];
localParticle.force[1] -= localForce[1];
localParticle.force[2] -= localForce[2];
psAChainRule[tj].force[0] += localForce[0];
psAChainRule[tj].force[1] += localForce[1];
psAChainRule[tj].force[2] += localForce[2];
#ifdef AMOEBA_DEBUG
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
if( atomI == targetAtom || (y+tj) == targetAtom ){
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].z = -1.0f;
debugArray[index].w = -1.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = (float) x;
debugArray[index].y = (float) y;
debugArray[index].z = (float) tgx;
debugArray[index].w = -2.0f;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = pullDebug[0].y;
debugArray[index].z = pullDebug[0].z;
debugArray[index].w = pullDebug[0].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[1].x;
debugArray[index].y = pullDebug[1].y;
debugArray[index].z = pullDebug[1].z;
debugArray[index].w = pullDebug[1].w;
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = localForce[0];
debugArray[index].y = localForce[1];
debugArray[index].z = localForce[2];
debugArray[index].w = -10.0f;
}
#endif
calculateGrycukChainRulePairIxn_kernel( psAChainRule[tj], localParticle, localForce
#ifdef AMOEBA_DEBUG
, pullDebug
#endif
);
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+tj) == targetAtom ){
index += cSim.paddedNumberOfAtoms;
debugArray[index].x = pullDebug[0].x;
debugArray[index].y = localForce[1];
debugArray[index].z = localForce[2];
debugArray[index].w = -11.0f;
}
#endif
localParticle.force[0] += localForce[0];
localParticle.force[1] += localForce[1];
localParticle.force[2] += localForce[2];
psAChainRule[tj].force[0] -= localForce[0];
psAChainRule[tj].force[1] -= localForce[1];
psAChainRule[tj].force[2] -= localForce[2];
}
tj = (tj + 1) & (GRID - 1);
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4[offset];
of.x += localParticle.force[0];
of.y += localParticle.force[1];
of.z += localParticle.force[2];
cSim.pForce4[offset] = of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = y + tgx + warp*cSim.stride;
#else
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4[offset];
of.x += sAChainRule[threadIdx.x].force[0];
of.y += sAChainRule[threadIdx.x].force[1];
of.z += sAChainRule[threadIdx.x].force[2];
cSim.pForce4[offset] = of;
lasty = y;
}
pos++;
}
}
......@@ -1666,8 +1666,8 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceToBornForcePrefactor_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
void kReduceToObcBornForcePrefactor_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -1723,8 +1723,8 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceToBornForcePrefactorAndSASA_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
void kReduceToObcBornForcePrefactorAndSASA_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -1788,17 +1788,137 @@ void kReduceToBornForcePrefactorAndSASA_kernel( unsigned int fieldComponents, un
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy / -6.0f;
}
/*
static void kReduceAndCombine_dBorn(amoebaGpuContext amoebaGpu )
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceToBornForcePrefactor_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
// Reduce field
while (pos < fieldComponents)
{
float totalField = 0.0f;
float* pFt1 = fieldIn1 + pos;
float* pFt2 = fieldIn2 + pos;
//float bornRadius = cSim.pBornRadii[pos];
//float obcChain = cSim.pObcChain[pos];
unsigned int i = outputBuffers;
while (i >= 4)
{
totalField += pFt1[0] + pFt1[fieldComponents] + pFt1[2*fieldComponents] + pFt1[3*fieldComponents];
totalField += pFt2[0] + pFt2[fieldComponents] + pFt2[2*fieldComponents] + pFt2[3*fieldComponents];
pFt1 += fieldComponents*4;
pFt2 += fieldComponents*4;
i -= 4;
}
if (i >= 2)
{
totalField += pFt1[0] + pFt1[fieldComponents];
totalField += pFt2[0] + pFt2[fieldComponents];
pFt1 += fieldComponents*2;
pFt2 += fieldComponents*2;
i -= 2;
}
if (i > 0)
{
totalField += pFt1[0];
totalField += pFt2[0];
}
//fieldOut[pos] = totalField*bornRadius*bornRadius*obcChain;
fieldOut[pos] = totalField;
pos += gridDim.x * blockDim.x;
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceToBornForcePrefactorAndSASA_kernel( unsigned int fieldComponents, unsigned int outputBuffers, float* fieldIn1, float* fieldIn2,
float* fieldOut )
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
float energy = 0.0f;
// Reduce field
while (pos < fieldComponents)
{
float totalForce = 0.0f;
float* pFt1 = fieldIn1 + pos;
float* pFt2 = fieldIn2 + pos;
float bornRadius = cSim.pBornRadii[pos];
//float obcChain = cSim.pObcChain[pos];
float2 obcData = cSim.pObcData[pos];
unsigned int i = outputBuffers;
while (i >= 4)
{
totalForce += pFt1[0] + pFt1[fieldComponents] + pFt1[2*fieldComponents] + pFt1[3*fieldComponents];
totalForce += pFt2[0] + pFt2[fieldComponents] + pFt2[2*fieldComponents] + pFt2[3*fieldComponents];
pFt1 += fieldComponents*4;
pFt2 += fieldComponents*4;
i -= 4;
}
if (i >= 2)
{
totalForce += pFt1[0] + pFt1[fieldComponents];
totalForce += pFt2[0] + pFt2[fieldComponents];
pFt1 += fieldComponents*2;
pFt2 += fieldComponents*2;
i -= 2;
}
if (i > 0)
{
totalForce += pFt1[0];
totalForce += pFt2[0];
}
kReduceAndCombineFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_1_1->_pDevData,
amoebaGpu->psWorkArray_1_2->_pDevData,
amoebaGpu->psBorn->_pDevData );
LAUNCHERROR("kReduce_dBorn");
} */
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = ( (obcData.x + cSim.dielectricOffset) / bornRadius);
ratio6 = ratio6*ratio6*ratio6;
ratio6 = ratio6*ratio6;
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius;
//totalForce *= bornRadius * bornRadius * obcChain;
fieldOut[pos] = totalForce;
energy += saTerm;
pos += gridDim.x * blockDim.x;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy / -6.0f;
}
static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
{
......@@ -1982,7 +2102,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
// Tinker's Born1 && E-diff
kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
//kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
kCalculateGrycukGbsaForces2( amoebaGpu );
kCalculateAmoebaKirkwoodEDiff( amoebaGpu );
// ---------------------------------------------------------------------------------------
......
......@@ -36,6 +36,7 @@
#include "AmoebaReferenceMultipoleForce.h"
#include "AmoebaReferenceVdwForce.h"
#include "AmoebaReferenceWcaDispersionForce.h"
#include "openmm/internal/AmoebaTorsionTorsionForceImpl.h"
#include "openmm/internal/AmoebaWcaDispersionForceImpl.h"
#include "AmoebaReferenceUreyBradleyForce.h"
#include "ReferencePlatform.h"
......@@ -412,16 +413,31 @@ void ReferenceCalcAmoebaTorsionTorsionForceKernel::initialize(const System& syst
for (int ii = 0; ii < numTorsionTorsionGrids; ii++) {
const TorsionTorsionGrid grid = force.getTorsionTorsionGrid( ii );
torsionTorsionGrids[ii].resize( grid.size() );
// check if grid needs to be reordered: x-angle should be 'slow' index
TorsionTorsionGrid reorderedGrid;
int reorder = 0;
if( grid[0][0][0] != grid[0][1][0] ){
AmoebaTorsionTorsionForceImpl::reorderGrid( grid, reorderedGrid );
reorder = 1;
}
for (unsigned int kk = 0; kk < grid.size(); kk++) {
torsionTorsionGrids[ii][kk].resize( grid[kk].size() );
for (unsigned int jj = 0; jj < grid[kk].size(); jj++) {
torsionTorsionGrids[ii][kk][jj].resize( grid[kk][jj].size() );
for (unsigned int ll = 0; ll < grid[ll][jj].size(); ll++) {
torsionTorsionGrids[ii][kk][jj][ll] = static_cast<RealOpenMM>(grid[kk][jj][ll]);
if( reorder ){
for (unsigned int ll = 0; ll < grid[ll][jj].size(); ll++) {
torsionTorsionGrids[ii][kk][jj][ll] = static_cast<RealOpenMM>(reorderedGrid[kk][jj][ll]);
}
} else {
for (unsigned int ll = 0; ll < grid[ll][jj].size(); ll++) {
torsionTorsionGrids[ii][kk][jj][ll] = static_cast<RealOpenMM>(grid[kk][jj][ll]);
}
}
}
}
......
......@@ -27,8 +27,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/amoebaKernels.h"
#include "openmm/System.h"
#include "openmm/amoebaKernels.h"
#include "SimTKUtilities/SimTKOpenMMRealType.h"
namespace OpenMM {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment