Commit 116a99ba authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods

parent ca43baf0
...@@ -286,11 +286,11 @@ int BrookBonded::isParameterStreamSet( int index ) const { ...@@ -286,11 +286,11 @@ int BrookBonded::isParameterStreamSet( int index ) const {
* *
*/ */
int BrookBonded::validateInverseMapStreamCount( int index, int count ) const { int BrookBonded::_validateInverseMapStreamCount( int index, int count ) const {
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::validateInverseMapStreamCount"; static const std::string methodName = "BrookBonded::_validateInverseMapStreamCount";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -450,11 +450,11 @@ BrookFloatStreamInternal** BrookBonded::getInverseStreamMapsStreams( int index ) ...@@ -450,11 +450,11 @@ BrookFloatStreamInternal** BrookBonded::getInverseStreamMapsStreams( int index )
/*Flips i,j,k,l to l,k,j,i while correctly shuffling the params */ /*Flips i,j,k,l to l,k,j,i while correctly shuffling the params */
void BrookBonded::flipQuartet( int ibonded, int *particles ){ void BrookBonded::_flipQuartet( int ibonded, int *particles ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// static const std::string methodName = "BrookBonded::flipQuartet"; // static const std::string methodName = "BrookBonded::_flipQuartet";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -473,11 +473,11 @@ void BrookBonded::flipQuartet( int ibonded, int *particles ){ ...@@ -473,11 +473,11 @@ void BrookBonded::flipQuartet( int ibonded, int *particles ){
} }
int BrookBonded::matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles ){ int BrookBonded::_matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// static const std::string methodName = "BrookBonded::matchTorsion"; // static const std::string methodName = "BrookBonded::_matchTorsion";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -516,13 +516,13 @@ int BrookBonded::matchTorsion( int i, int j, int k, int l, int nbondeds, int *pa ...@@ -516,13 +516,13 @@ int BrookBonded::matchTorsion( int i, int j, int k, int l, int nbondeds, int *pa
* *
**/ **/
int BrookBonded::matchAngle( int i, int j, int k, int nbondeds, int BrookBonded::_matchAngle( int i, int j, int k, int nbondeds,
int *particles, int *flag ){ int *particles, int *flag ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
int n; int n;
static const std::string methodName = "BrookBonded::matchAngle"; static const std::string methodName = "BrookBonded::_matchAngle";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -572,11 +572,11 @@ int BrookBonded::matchAngle( int i, int j, int k, int nbondeds, ...@@ -572,11 +572,11 @@ int BrookBonded::matchAngle( int i, int j, int k, int nbondeds,
* *
* */ * */
int BrookBonded::matchBond( int i, int j, int nbondeds, int *particles, int *flag ){ int BrookBonded::_matchBond( int i, int j, int nbondeds, int *particles, int *flag ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::matchBond"; static const std::string methodName = "BrookBonded::_matchBond";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -638,11 +638,11 @@ int BrookBonded::matchBond( int i, int j, int nbondeds, int *particles, int *fla ...@@ -638,11 +638,11 @@ int BrookBonded::matchBond( int i, int j, int nbondeds, int *particles, int *fla
return ErrorReturnValue; return ErrorReturnValue;
} }
int BrookBonded::matchPair( int i, int j, int nbondeds, int *particles ){ int BrookBonded::_matchPair( int i, int j, int nbondeds, int *particles ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
//static const std::string methodName = "BrookBonded::matchPair"; //static const std::string methodName = "BrookBonded::_matchPair";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -699,13 +699,13 @@ int BrookBonded::matchPair( int i, int j, int nbondeds, int *particles ){ ...@@ -699,13 +699,13 @@ int BrookBonded::matchPair( int i, int j, int nbondeds, int *particles ){
* *
*/ */
int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[], int BrookBonded::_addRBTorsions( int *nbondeds, int *particles, float *params[],
const vector<vector<int> >& rbTorsionIndices, const vector<vector<int> >& rbTorsionIndices,
const vector<vector<double> >& rbTorsionParameters ){ const vector<vector<double> >& rbTorsionParameters ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::BrookBonded"; static const std::string methodName = "BrookBonded::_addRBTorsions";
static const int debug = 0; static const int debug = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -725,7 +725,7 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[], ...@@ -725,7 +725,7 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[],
int k = particlesIndices[index++]; int k = particlesIndices[index++];
int l = particlesIndices[index++]; int l = particlesIndices[index++];
int ibonded = matchTorsion( i, j, k, l, *nbondeds, particles ); int ibonded = _matchTorsion( i, j, k, l, *nbondeds, particles );
if( ibonded < 0 ){ if( ibonded < 0 ){
ibonded = *nbondeds; ibonded = *nbondeds;
...@@ -767,13 +767,13 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[], ...@@ -767,13 +767,13 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[],
* *
*/ */
int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], int BrookBonded::_addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
const vector<vector<int> >& periodicTorsionIndices, const vector<vector<int> >& periodicTorsionIndices,
const vector<vector<double> >& periodicTorsionParameters ){ const vector<vector<double> >& periodicTorsionParameters ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addPTorsion"; static const std::string methodName = "BrookBonded::_addPTorsion";
static const int debug = 0; static const int debug = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -793,7 +793,7 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* ...@@ -793,7 +793,7 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat*
int k = particlesIndices[index++]; int k = particlesIndices[index++];
int l = particlesIndices[index++]; int l = particlesIndices[index++];
int ibonded = matchTorsion( i, j, k, l, *nbondeds, particles ); int ibonded = _matchTorsion( i, j, k, l, *nbondeds, particles );
if( ibonded < 0 ){ if( ibonded < 0 ){
ibonded = *nbondeds; ibonded = *nbondeds;
...@@ -833,12 +833,12 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* ...@@ -833,12 +833,12 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat*
* *
*/ */
int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], const std::vector<std::vector<int> >& angleIndices, int BrookBonded::_addAngles( int *nbondeds, int *particles, float *params[], const std::vector<std::vector<int> >& angleIndices,
const std::vector<std::vector<double> >& angleParameters ){ const std::vector<std::vector<double> >& angleParameters ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addAngles"; static const std::string methodName = "BrookBonded::_addAngles";
static const int debug = 0; static const int debug = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -860,7 +860,7 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons ...@@ -860,7 +860,7 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons
int k = particlesIndices[index++]; int k = particlesIndices[index++];
int flag; int flag;
int ibonded = matchAngle( i, j, k, *nbondeds, particles, &flag ); int ibonded = _matchAngle( i, j, k, *nbondeds, particles, &flag );
if( ibonded < 0 ){ if( ibonded < 0 ){
ibonded = *nbondeds; ibonded = *nbondeds;
ATOMS( ibonded, 0 ) = i; ATOMS( ibonded, 0 ) = i;
...@@ -895,12 +895,12 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons ...@@ -895,12 +895,12 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons
* *
*/ */
int BrookBonded::addBonds( int *nbondeds, int *particles, float *params[], const vector<vector<int> >& bondIndices, int BrookBonded::_addBonds( int *nbondeds, int *particles, float *params[], const vector<vector<int> >& bondIndices,
const vector<vector<double> >& bondParameters ){ const vector<vector<double> >& bondParameters ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addBonds"; static const std::string methodName = "BrookBonded::_addBonds";
static const int debug = 0; static const int debug = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -929,7 +929,7 @@ int BrookBonded::addBonds( int *nbondeds, int *particles, float *params[], const ...@@ -929,7 +929,7 @@ int BrookBonded::addBonds( int *nbondeds, int *particles, float *params[], const
} }
int flag; int flag;
int ibonded = matchBond( i, j, *nbondeds, particles, &flag ); int ibonded = _matchBond( i, j, *nbondeds, particles, &flag );
int saveIbond = ibonded; int saveIbond = ibonded;
if( ibonded < 0 ){ if( ibonded < 0 ){
ibonded = *nbondeds; ibonded = *nbondeds;
...@@ -972,14 +972,14 @@ int saveIbond = ibonded; ...@@ -972,14 +972,14 @@ int saveIbond = ibonded;
* *
*/ */
int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[], int BrookBonded::_addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
BrookOpenMMFloat* charges, BrookOpenMMFloat* charges,
const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<int> >& bonded14Indices,
const std::vector<std::vector<double> >& nonbondedParameters ){ const std::vector<std::vector<double> >& nonbondedParameters ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addPairs"; static const std::string methodName = "BrookBonded::_addPairs";
static const double oneSixth = 1.0/6.0; static const double oneSixth = 1.0/6.0;
static const int debug = 1; static const int debug = 1;
...@@ -994,7 +994,7 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para ...@@ -994,7 +994,7 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para
int i = bonded14Indices[ii][0]; int i = bonded14Indices[ii][0];
int j = bonded14Indices[ii][1]; int j = bonded14Indices[ii][1];
int ibonded = matchPair( i, j, *nbondeds, particles ); int ibonded = _matchPair( i, j, *nbondeds, particles );
if( ibonded < 0 ){ if( ibonded < 0 ){
ibonded = *nbondeds; ibonded = *nbondeds;
ATOMS(ibonded, 0) = i; ATOMS(ibonded, 0) = i;
...@@ -1048,11 +1048,11 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para ...@@ -1048,11 +1048,11 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para
* *
*/ */
int BrookBonded::loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize ){ int BrookBonded::_loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::loadInvMaps"; static const std::string methodName = "BrookBonded::_loadInvMaps";
static const int PrintOn = 0; static const int PrintOn = 0;
double dangleValue = 0.0; double dangleValue = 0.0;
...@@ -1113,9 +1113,9 @@ int BrookBonded::loadInvMaps( int nbondeds, int nparticles, int *particles, int ...@@ -1113,9 +1113,9 @@ int BrookBonded::loadInvMaps( int nbondeds, int nparticles, int *particles, int
for( int jj = 0; jj < 4*getMaxInverseMapStreamCount()*particleStreamSize; jj++ ){ for( int jj = 0; jj < 4*getMaxInverseMapStreamCount()*particleStreamSize; jj++ ){
block[jj] = -1.0f; block[jj] = -1.0f;
} }
gpuCalcInvMap( ii, 4, nbondeds, nparticles, particles, getInverseMapStreamCount( ii ), counts, invmaps, &(_inverseMapStreamCount[ii]) ); _gpuCalcInvMap( ii, 4, nbondeds, nparticles, particles, getInverseMapStreamCount( ii ), counts, invmaps, &(_inverseMapStreamCount[ii]) );
//gpuPrintInvMaps( _inverseMapStreamCount[ii], nparticles, counts, invmaps, getLog() ); //gpuPrintInvMaps( _inverseMapStreamCount[ii], nparticles, counts, invmaps, getLog() );
validateInverseMapStreamCount( ii, _inverseMapStreamCount[ii] ); _validateInverseMapStreamCount( ii, _inverseMapStreamCount[ii] );
for( int jj = 0; jj < _inverseMapStreamCount[ii]; jj++ ){ for( int jj = 0; jj < _inverseMapStreamCount[ii]; jj++ ){
_inverseStreamMaps[ii][jj]->loadFromArray( invmaps[jj] ); _inverseStreamMaps[ii][jj]->loadFromArray( invmaps[jj] );
...@@ -1263,19 +1263,19 @@ int BrookBonded::setup( int numberOfParticles, ...@@ -1263,19 +1263,19 @@ int BrookBonded::setup( int numberOfParticles,
int nbondeds = 0; int nbondeds = 0;
if( rbTorsionBrookBondParameters ){ if( rbTorsionBrookBondParameters ){
addRBTorsions( &nbondeds, particles, params, rbTorsionBrookBondParameters->getParticleIndices(), rbTorsionBrookBondParameters->getBondParameters() ); _addRBTorsions( &nbondeds, particles, params, rbTorsionBrookBondParameters->getParticleIndices(), rbTorsionBrookBondParameters->getBondParameters() );
} }
if( periodicTorsionBrookBondParameters ){ if( periodicTorsionBrookBondParameters ){
addPTorsions( &nbondeds, particles, params, periodicTorsionBrookBondParameters->getParticleIndices(), periodicTorsionBrookBondParameters->getBondParameters() ); _addPTorsions( &nbondeds, particles, params, periodicTorsionBrookBondParameters->getParticleIndices(), periodicTorsionBrookBondParameters->getBondParameters() );
} }
if( harmonicAngleBrookBondParameters ){ if( harmonicAngleBrookBondParameters ){
addAngles( &nbondeds, particles, params, harmonicAngleBrookBondParameters->getParticleIndices(), harmonicAngleBrookBondParameters->getBondParameters() ); _addAngles( &nbondeds, particles, params, harmonicAngleBrookBondParameters->getParticleIndices(), harmonicAngleBrookBondParameters->getBondParameters() );
} }
if( harmonicBondBrookBondParameters ){ if( harmonicBondBrookBondParameters ){
addBonds( &nbondeds, particles, params, harmonicBondBrookBondParameters->getParticleIndices(), harmonicBondBrookBondParameters->getBondParameters() ); _addBonds( &nbondeds, particles, params, harmonicBondBrookBondParameters->getParticleIndices(), harmonicBondBrookBondParameters->getBondParameters() );
} }
if( nonBonded14ForceParameters ){ if( nonBonded14ForceParameters ){
addPairs( &nbondeds, particles, params, charges, nonBonded14ForceParameters->getParticleIndices(), nonBonded14ForceParameters->getBondParameters() ); _addPairs( &nbondeds, particles, params, charges, nonBonded14ForceParameters->getParticleIndices(), nonBonded14ForceParameters->getBondParameters() );
} }
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -1387,7 +1387,7 @@ int BrookBonded::setup( int numberOfParticles, ...@@ -1387,7 +1387,7 @@ int BrookBonded::setup( int numberOfParticles,
// load inverse maps to streams // load inverse maps to streams
loadInvMaps( nbondeds, getNumberOfParticles(), particles, particleStreamWidth, particleStreamSize ); _loadInvMaps( nbondeds, getNumberOfParticles(), particles, particleStreamWidth, particleStreamSize );
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -1531,9 +1531,9 @@ std::string BrookBonded::getContentsString( int level ) const { ...@@ -1531,9 +1531,9 @@ std::string BrookBonded::getContentsString( int level ) const {
* *
**/ **/
int BrookBonded::gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles, int BrookBonded::_gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int *particles, int nmaps, int counts[], float4 *invmaps[], int *particles, int nmaps, int counts[], float4 *invmaps[],
int *nimaps ){ int *nimaps ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -1541,7 +1541,7 @@ int BrookBonded::gpuCalcInvMap( int posflag, int niparticles, int nints, int npa ...@@ -1541,7 +1541,7 @@ int BrookBonded::gpuCalcInvMap( int posflag, int niparticles, int nints, int npa
int particle; int particle;
int mapnum, mapcomp; int mapnum, mapcomp;
static const std::string methodName = "BrookBonded::gpuCalcInvMap"; static const std::string methodName = "BrookBonded::_gpuCalcInvMap";
static const unsigned int MAX_LINE_CHARS = 256; static const unsigned int MAX_LINE_CHARS = 256;
//char value[MAX_LINE_CHARS]; //char value[MAX_LINE_CHARS];
...@@ -1654,7 +1654,7 @@ if( PrintOn && getLog() ){ ...@@ -1654,7 +1654,7 @@ if( PrintOn && getLog() ){
} }
void BrookBonded::gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile ){ void BrookBonded::_gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile ){
int i; int i;
int j; int j;
for( i = 0; i < nparticles; i++ ){ for( i = 0; i < nparticles; i++ ){
...@@ -1676,7 +1676,7 @@ void BrookBonded::gpuPrintInvMaps( int nmaps, int nparticles, int counts[], floa ...@@ -1676,7 +1676,7 @@ void BrookBonded::gpuPrintInvMaps( int nmaps, int nparticles, int counts[], floa
* which should always be true * which should always be true
* */ * */
int BrookBonded::gpuCalcInvMap_merged( int BrookBonded::_gpuCalcInvMap_merged(
int nints, //number of interactions int nints, //number of interactions
int nparticles, //number of particles int nparticles, //number of particles
int *particles, //ijkl,ijkl,ijkl... int *particles, //ijkl,ijkl,ijkl...
...@@ -1754,8 +1754,8 @@ int BrookBonded::gpuCalcInvMap_merged( ...@@ -1754,8 +1754,8 @@ int BrookBonded::gpuCalcInvMap_merged(
* *
* buf should be nimaps * nparticles large. * buf should be nimaps * nparticles large.
* */ * */
int BrookBonded::gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts, int BrookBonded::_gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts,
float4 *invmaps[], float4 *buf ){ float4 *invmaps[], float4 *buf ){
int i, j; int i, j;
int nmaps_i; int nmaps_i;
......
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include <vector> #include <vector>
#include "BrookStreamImpl.h" #include "BrookStreamImpl.h"
//#include "BrookPlatform.h"
#include "BrookCommon.h" #include "BrookCommon.h"
#include "OpenMMContext.h" #include "OpenMMContext.h"
#include "BrookBondParameters.h" #include "BrookBondParameters.h"
...@@ -43,7 +42,7 @@ ...@@ -43,7 +42,7 @@
namespace OpenMM { namespace OpenMM {
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * Calculate Brook bonded forces
*/ */
class BrookBonded : public BrookCommon { class BrookBonded : public BrookCommon {
...@@ -68,33 +67,19 @@ class BrookBonded : public BrookCommon { ...@@ -68,33 +67,19 @@ class BrookBonded : public BrookCommon {
/** /**
* Initialize the kernel, setting up the values of all the force field parameters. * Initialize the kernel, setting up the values of all the force field parameters.
* *
* @param bondIndices the two particles connected by each bond term * @param numberOfParticles numberOfParticles
* @param bondParameters the force parameters (length, k) for each bond term * @param harmonicBondBrookBondParameters force parameters (length, k) for each bond term
* @param angleIndices the three particles connected by each angle term * @param harmonicAngleBrookBondParameters force parameters (angle, k) for each angle term
* @param angleParameters the force parameters (angle, k) for each angle term * @param periodicTorsionBrookBondParameters force parameters (k, phase, periodicity) for each periodic torsion term
* @param periodicTorsionIndices the four particles connected by each periodic torsion term * @param rbTorsionBrookBondParameters coefficients (in order of increasing powers) for each Ryckaert-Bellemans torsion term
* @param periodicTorsionParameters the force parameters (k, phase, periodicity) for each periodic torsion term * @param nonBonded14ForceParameters nonbonded 14 force parameters (charge, sigma, epsilon) for each particle
* @param rbTorsionIndices the four particles connected by each Ryckaert-Bellemans torsion term * @param particleStreamWidth stream width
* @param rbTorsionParameters the coefficients (in order of increasing powers) for each Ryckaert-Bellemans torsion term * @param particleStreamSize stream size
* @param bonded14Indices each element contains the indices of two particles whose nonbonded interactions should be reduced since
* they form a bonded 1-4 pair
* @param nonbondedParameters the nonbonded force parameters (charge, sigma, epsilon) for each particle
* @param log log reference
* *
* @return nonzero value if error * @return nonzero value if error
* *
*/ */
/*
int setup( int numberOfParticles,
const std::vector<std::vector<int> >& bondIndices, const std::vector<std::vector<double> >& bondParameters,
const std::vector<std::vector<int> >& angleIndices, const std::vector<std::vector<double> >& angleParameters,
const std::vector<std::vector<int> >& periodicTorsionIndices, const std::vector<std::vector<double> >& periodicTorsionParameters,
const std::vector<std::vector<int> >& rbTorsionIndices, const std::vector<std::vector<double> >& rbTorsionParameters,
const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<double> >& nonbondedParameters,
const Platform& platform );
*/
int setup( int numberOfParticles, int setup( int numberOfParticles,
BrookBondParameters* harmonicBondBrookBondParameters, BrookBondParameters* harmonicBondBrookBondParameters,
BrookBondParameters* harmonicAngleBrookBondParameters, BrookBondParameters* harmonicAngleBrookBondParameters,
...@@ -316,11 +301,12 @@ class BrookBonded : public BrookCommon { ...@@ -316,11 +301,12 @@ class BrookBonded : public BrookCommon {
// helper methods in setup of parameters // helper methods in setup of parameters
void flipQuartet( int ibonded, int *particles ); void _flipQuartet( int ibonded, int *particles );
int matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles );
int matchAngle( int i, int j, int k, int nbondeds, int *particles, int *flag ); int _matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles );
int matchBond( int i, int j, int nbondeds, int *particles, int *flag ); int _matchAngle( int i, int j, int k, int nbondeds, int *particles, int *flag );
int matchPair( int i, int j, int nbondeds, int *particles ); int _matchBond( int i, int j, int nbondeds, int *particles, int *flag );
int _matchPair( int i, int j, int nbondeds, int *particles );
/** /**
* Setup Ryckaert-Bellemans parameters/particle indices * Setup Ryckaert-Bellemans parameters/particle indices
...@@ -335,7 +321,7 @@ class BrookBonded : public BrookCommon { ...@@ -335,7 +321,7 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int addRBTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& rbTorsionIndices, int _addRBTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& rbTorsionIndices,
const std::vector<std::vector<double> >& rbTorsionParameters ); const std::vector<std::vector<double> >& rbTorsionParameters );
/** /**
...@@ -351,7 +337,7 @@ class BrookBonded : public BrookCommon { ...@@ -351,7 +337,7 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& periodicTorsionIndices, int _addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& periodicTorsionIndices,
const std::vector<std::vector<double> >& periodicTorsionParameters ); const std::vector<std::vector<double> >& periodicTorsionParameters );
/** /**
...@@ -367,8 +353,8 @@ class BrookBonded : public BrookCommon { ...@@ -367,8 +353,8 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int addAngles( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& angleIndices, int _addAngles( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& angleIndices,
const std::vector<std::vector<double> >& angleParameters ); const std::vector<std::vector<double> >& angleParameters );
/** /**
* Setup harmonic bond parameters/particle indices * Setup harmonic bond parameters/particle indices
...@@ -383,8 +369,8 @@ class BrookBonded : public BrookCommon { ...@@ -383,8 +369,8 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int addBonds( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& bondIndices, int _addBonds( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& bondIndices,
const std::vector<std::vector<double> >& bondParameters ); const std::vector<std::vector<double> >& bondParameters );
/** /**
* Setup LJ/Coulomb 1-4 parameters/particle indices * Setup LJ/Coulomb 1-4 parameters/particle indices
...@@ -401,8 +387,8 @@ class BrookBonded : public BrookCommon { ...@@ -401,8 +387,8 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[], BrookOpenMMFloat* charges, int _addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[], BrookOpenMMFloat* charges,
const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<double> >& nonbondedParameters ); const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<double> >& nonbondedParameters );
/** /**
* Create and load inverse maps for bonded ixns * Create and load inverse maps for bonded ixns
...@@ -418,7 +404,7 @@ class BrookBonded : public BrookCommon { ...@@ -418,7 +404,7 @@ class BrookBonded : public BrookCommon {
*/ */
//int loadInvMaps( int nbondeds, int nparticles, int *particles, const BrookPlatform& platform ); //int loadInvMaps( int nbondeds, int nparticles, int *particles, const BrookPlatform& platform );
int loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize ); int _loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize );
/** /**
* Validate inverse map count * Validate inverse map count
...@@ -432,7 +418,7 @@ class BrookBonded : public BrookCommon { ...@@ -432,7 +418,7 @@ class BrookBonded : public BrookCommon {
* *
*/ */
int validateInverseMapStreamCount( int index, int count ) const; int _validateInverseMapStreamCount( int index, int count ) const;
/* /*
* Helper functions for building inverse maps for * Helper functions for building inverse maps for
...@@ -462,28 +448,31 @@ class BrookBonded : public BrookCommon { ...@@ -462,28 +448,31 @@ class BrookBonded : public BrookCommon {
* *
**/ **/
int gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles, int _gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int *particles, int nmaps, int counts[], float4 *invmaps[], int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
int *nimaps );
void gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile ); void _gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile );
/* We are still plagued by kernel call overheads. This is for a big fat /*
* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel: * merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest * Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add * integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the * 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000 * lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true * which should always be true
* */ *
int gpuCalcInvMap_merged( int nints, int nparticles, int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps ); **/
int _gpuCalcInvMap_merged( int nints, int nparticles, int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
/* Repacks the invmap streams for more efficient access in the /*
* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel * merged inverse gather kernel
* *
* buf should be nimaps * nparticles large. * buf should be nimaps * nparticles large.
* */ *
int gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts, float4 *invmaps[], float4 *buf ); **/
int _gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts, float4 *invmaps[], float4 *buf );
}; };
......
...@@ -141,20 +141,19 @@ void BrookCalcGBSAOBCForceKernel::initialize( const System& system, const GBSAOB ...@@ -141,20 +141,19 @@ void BrookCalcGBSAOBCForceKernel::initialize( const System& system, const GBSAOB
// and initialize brookGbsa // and initialize brookGbsa
_numberOfParticles = system.getNumParticles(); _numberOfParticles = system.getNumParticles();
std::vector<std::vector<double> > particleParameters; std::vector<std::vector<double> > particleParameters( _numberOfParticles );
for( int ii = 0; ii < _numberOfParticles; ii++ ){ for( int ii = 0; ii < _numberOfParticles; ii++ ){
double charge, radius, scalingFactor; double charge, radius, scalingFactor;
force.getParticleParameters( ii, charge, radius, scalingFactor ); force.getParticleParameters( ii, charge, radius, scalingFactor );
std::vector<double> parameters; particleParameters[ii].push_back( charge );
particleParameters[ii] = parameters; particleParameters[ii].push_back( radius );
particleParameters[ii].push_back( scalingFactor );
parameters[0] = charge;
parameters[1] = radius;
parameters[2] = scalingFactor;
} }
brookGbsa.setup( particleParameters, force.getSolventDielectric(), force.getSoluteDielectric(), getPlatform() ); brookGbsa.setup( particleParameters, force.getSolventDielectric(), force.getSoluteDielectric(), getPlatform() );
brookGbsa.setIsActive( 1 );
_openMMBrookInterface.setTriggerForceKernel( this ); _openMMBrookInterface.setTriggerForceKernel( this );
_openMMBrookInterface.setTriggerEnergyKernel( this ); _openMMBrookInterface.setTriggerEnergyKernel( this );
......
...@@ -107,12 +107,14 @@ void BrookCalcKineticEnergyKernel::initialize( const System& system ){ ...@@ -107,12 +107,14 @@ void BrookCalcKineticEnergyKernel::initialize( const System& system ){
_masses[ii] = static_cast<BrookOpenMMFloat>(system.getParticleMass(ii)); _masses[ii] = static_cast<BrookOpenMMFloat>(system.getParticleMass(ii));
} }
/*
std::vector<double> masses; std::vector<double> masses;
for( unsigned int ii = 0; ii < (unsigned int) _numberOfParticles; ii++ ){ for( unsigned int ii = 0; ii < (unsigned int) _numberOfParticles; ii++ ){
masses.push_back( system.getParticleMass(ii) ); masses.push_back( system.getParticleMass(ii) );
} }
_brookVelocityCenterOfMassRemoval = new BrookVelocityCenterOfMassRemoval(); _brookVelocityCenterOfMassRemoval = new BrookVelocityCenterOfMassRemoval();
_brookVelocityCenterOfMassRemoval->setup( masses, getPlatform() ); _brookVelocityCenterOfMassRemoval->setup( masses, getPlatform() );
*/
return; return;
} }
...@@ -130,7 +132,7 @@ double BrookCalcKineticEnergyKernel::execute( OpenMMContextImpl& context ){ ...@@ -130,7 +132,7 @@ double BrookCalcKineticEnergyKernel::execute( OpenMMContextImpl& context ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookCalcKineticEnergyKernel::execute"; static const std::string methodName = "BrookCalcKineticEnergyKernel::execute";
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -166,10 +168,11 @@ printf( "NewCom [%12.5e %12.5e %12.5e]\n", newcom[0], newcom[1], newcom[2] ); ...@@ -166,10 +168,11 @@ printf( "NewCom [%12.5e %12.5e %12.5e]\n", newcom[0], newcom[1], newcom[2] );
index = 0; index = 0;
*/ */
for( int ii = 0; ii < _numberOfParticles; ii++, index += 3 ){ for( int ii = 0; ii < _numberOfParticles; ii++, index += 3 ){
energy += _masses[ii]*(velocity[index]*velocity[index] + velocity[index + 1]*velocity[index + 1] + velocity[index + 2]*velocity[index + 2]); energy += _masses[ii]*(velocity[index]*velocity[index] + velocity[index + 1]*velocity[index + 1] + velocity[index + 2]*velocity[index + 2]);
} }
//printf( " KQ=%12.5e\n", 0.5*energy ); printf( " KQ=%12.5e\n", 0.5*energy );
return 0.5*energy; return 0.5*energy;
} }
...@@ -688,7 +688,7 @@ int BrookGbsa::setup( const std::vector<std::vector<double> >& vectorOfParticleP ...@@ -688,7 +688,7 @@ int BrookGbsa::setup( const std::vector<std::vector<double> >& vectorOfParticleP
vector<RealOpenMM> particleRadii(numberOfParticles); vector<RealOpenMM> particleRadii(numberOfParticles);
vector<RealOpenMM> scaleFactors(numberOfParticles); vector<RealOpenMM> scaleFactors(numberOfParticles);
float dielectricOffset = getDielectricOffset(); float dielectricOffset = getDielectricOffset();
// loop over particle parameters // loop over particle parameters
// track any errors and then throw exception // track any errors and then throw exception
...@@ -717,7 +717,7 @@ int BrookGbsa::setup( const std::vector<std::vector<double> >& vectorOfParticleP ...@@ -717,7 +717,7 @@ int BrookGbsa::setup( const std::vector<std::vector<double> >& vectorOfParticleP
int streamIndex = 2*vectorIndex; int streamIndex = 2*vectorIndex;
particleRadii[vectorIndex] = static_cast<RealOpenMM> (radius); particleRadii[vectorIndex] = static_cast<RealOpenMM> (radius);
scaleFactors[vectorIndex] = static_cast<RealOpenMM> (scalingFactor); scaleFactors[vectorIndex] = static_cast<RealOpenMM> (scalingFactor);
_charges[vectorIndex] = static_cast<RealOpenMM> (charge); _charges[vectorIndex] = static_cast<RealOpenMM> (charge);
...@@ -904,56 +904,6 @@ std::string BrookGbsa::getContentsString( int level ) const { ...@@ -904,56 +904,6 @@ std::string BrookGbsa::getContentsString( int level ) const {
return message.str(); return message.str();
} }
/*
* Calculate OBC energy
*
* @param particlePositions particle positions
* @return energy
*
* @throw OpenMMException if _cpuObc or charges are not set
*
* */
double BrookGbsa::getEnergy( const Stream& particlePositions ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookGbsa::getEnergy";
// ---------------------------------------------------------------------------------------
// validate initialization
if( _cpuObc == NULL ){
std::stringstream message;
message << methodName << " _cpuObc not set.";
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
if( _charges == NULL ){
std::stringstream message;
message << methodName << " charges not set.";
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
const BrookStreamImpl& positionStreamC = dynamic_cast<const BrookStreamImpl&> (particlePositions.getImpl());
BrookStreamImpl& positionStream = const_cast<BrookStreamImpl&> (positionStreamC);
BrookOpenMMFloat* positionsF = (BrookOpenMMFloat*) positionStream.getData();
RealOpenMM** positions = copy1DArrayTo2DArray( positionStream.getSize(), 3, positionsF );
RealOpenMM** forces = allocateRealArray( positionStream.getSize(), 3 );
_cpuObc->computeImplicitSolventForces( positions, _charges, forces, 1 );
disposeRealArray( forces );
disposeRealArray( positions );
return _cpuObc->getEnergy();
}
/** /**
* Compute forces * Compute forces
* *
...@@ -964,7 +914,7 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl& ...@@ -964,7 +914,7 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookGbsa::executeForces"; static const std::string methodName = "BrookGbsa::executeForces";
static const int PrintOn = 0; static const int PrintOn = 1;
float mergeNonObcForces = 1.0f; float mergeNonObcForces = 1.0f;
float kcalMolTokJNM = -0.4184f; float kcalMolTokJNM = -0.4184f;
...@@ -983,13 +933,36 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl& ...@@ -983,13 +933,36 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
(float) getParticleStreamWidth( ), (float) getParticleStreamWidth( ),
(float) getPartialForceStreamWidth( ), (float) getPartialForceStreamWidth( ),
positionStream.getBrookStream(), positionStream.getBrookStream(),
getObcParticleRadii()->getBrookStream(),
getObcScaledParticleRadii()->getBrookStream(), getObcScaledParticleRadii()->getBrookStream(),
// NOTE: obcParticleRadiiWithDielectricOffset & obcScaledParticleRadii may be wrong!! or br files need editing
// gpuObc->getBrookStreamWrapperAtIndex( GpuObc::obcParticleRadiiWithDielectricOffset )->getStream(),
// gpuObc->getBrookStreamWrapperAtIndex( GpuObc::obcScaledParticleRadii )->getStream(),
gbsaForceStreams[0]->getBrookStream() ); gbsaForceStreams[0]->getBrookStream() );
// ---------------------------------------------------------------------------------------
// diagnostics
if( 0 && PrintOn && getLog() ){
(void) fprintf( getLog(), "\n%s Post kCalculateBornRadii: atms=%d ceil=%d dup=%d particleStrW=%3d prtlF=%3d diel=%.3f %.3f ACE=%.1f\n",
methodName.c_str(), getNumberOfParticles(),
getParticleSizeCeiling(),
getDuplicationFactor(),
getParticleStreamWidth( ),
getPartialForceStreamWidth( ) );
BrookStreamInternal* brookStreamInternalF = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalF->printToFile( getLog() );
(void) fprintf( getLog(), "\nRadii\n" );
getObcParticleRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcScaledParticleRadii\n" );
getObcScaledParticleRadii()->printToFile( getLog() );
}
// ---------------------------------------------------------------------------------------
kPostCalculateBornRadii_nobranch( kPostCalculateBornRadii_nobranch(
(float) getDuplicationFactor(), (float) getDuplicationFactor(),
(float) getParticleStreamWidth( ), (float) getParticleStreamWidth( ),
...@@ -1004,6 +977,37 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl& ...@@ -1004,6 +977,37 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
getObcBornRadii()->getBrookStream(), getObcBornRadii()->getBrookStream(),
getObcChain()->getBrookStream() ); getObcChain()->getBrookStream() );
// ---------------------------------------------------------------------------------------
// diagnostics
if( 0 && PrintOn && getLog() ){
(void) fprintf( getLog(), "\n%s Post kPostCalculateBornRadii_nobranch: atms=%d ceil=%d dup=%d particleStrW=%3d prtlF=%3d diel=%.3f %.3f ACE=%.1f\n",
methodName.c_str(), getNumberOfParticles(),
getParticleSizeCeiling(),
getDuplicationFactor(),
getParticleStreamWidth( ),
getPartialForceStreamWidth( ) );
BrookStreamInternal* brookStreamInternalF = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalF->printToFile( getLog() );
(void) fprintf( getLog(), "\nInput\n" );
gbsaForceStreams[0]->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcParticleRadii\n" );
getObcParticleRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nBornR\n" );
getObcBornRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcChain\n" );
getObcChain()->printToFile( getLog() );
}
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// seecond major loop // seecond major loop
......
...@@ -163,12 +163,12 @@ void BrookIntegrateVerletStepKernel::initialize( const System& system, const Ve ...@@ -163,12 +163,12 @@ void BrookIntegrateVerletStepKernel::initialize( const System& system, const Ve
_brookVerletDynamics->setLog( log ); _brookVerletDynamics->setLog( log );
_brookShakeAlgorithm = new BrookShakeAlgorithm( ); _brookShakeAlgorithm = new BrookShakeAlgorithm( );
_brookShakeAlgorithm->setLog( log );
_brookShakeAlgorithm->setup( masses, constraintIndicesVector, constraintLengths, getPlatform() ); _brookShakeAlgorithm->setup( masses, constraintIndicesVector, constraintLengths, getPlatform() );
BrookOpenMMFloat tolerance = static_cast<BrookOpenMMFloat>( integrator.getConstraintTolerance() ); BrookOpenMMFloat tolerance = static_cast<BrookOpenMMFloat>( integrator.getConstraintTolerance() );
_brookShakeAlgorithm->setShakeTolerance( tolerance ); _brookShakeAlgorithm->setShakeTolerance( tolerance );
_brookShakeAlgorithm->setMaxIterations( 30 ); _brookShakeAlgorithm->setMaxIterations( 30 );
_brookShakeAlgorithm->setLog( log );
if( printOn && log ){ if( printOn && log ){
(void) fprintf( log, "%s done w/ setup: particles=%d const=%d\n", methodName.c_str(), numberOfParticles, numberOfConstraints ); (void) fprintf( log, "%s done w/ setup: particles=%d const=%d\n", methodName.c_str(), numberOfParticles, numberOfConstraints );
......
...@@ -370,7 +370,6 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -370,7 +370,6 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const char* methodName = "\nBrookLangevinDynamics::update"; static const char* methodName = "\nBrookLangevinDynamics::update";
static const int PrintOn = 0; static const int PrintOn = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -427,8 +426,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -427,8 +426,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// diagnostics // diagnostics
if( PrintOn ){ if( PrintOn ){
(void) fprintf( getLog(), "\nPost kupdate_sd1_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d " (void) fprintf( getLog(), "\n%s Post kupdate_sd1_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"EM=%12.5e Sd1pc[]=[%12.5e %12.5e %12.5e]", "EM=%12.5e Sd1pc[]=[%12.5e %12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(), getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(), brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(), brookRandomNumberGenerator.getRvStreamOffset(),
...@@ -477,10 +476,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -477,10 +476,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// first Shake step // first Shake step
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){ if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
kshakeh_fix1( kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(), (float) brookShakeAlgorithm.getMaxIterations(),
(float) getLangevinDynamicsParticleStreamWidth(), (float) getLangevinDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(), brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(), brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(), positionStream.getBrookStream(),
...@@ -503,6 +502,37 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -503,6 +502,37 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
brookShakeAlgorithm.getShakeXCons2Stream()->getBrookStream(), brookShakeAlgorithm.getShakeXCons2Stream()->getBrookStream(),
brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(), brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(),
getXPrimeStream()->getBrookStream() ); getXPrimeStream()->getBrookStream() );
if( ( 0 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d",
methodName, getLangevinDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\nShakeInverseMapStream\n" );
brookShakeAlgorithm.getShakeInverseMapStream()->printToFile( getLog() );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons0\n" );
brookShakeAlgorithm.getShakeXCons0Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons1\n" );
brookShakeAlgorithm.getShakeXCons1Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons2\n" );
brookShakeAlgorithm.getShakeXCons2Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons3\n" );
brookShakeAlgorithm.getShakeXCons3Stream()->printToFile( getLog() );
}
} }
// second integration step // second integration step
...@@ -527,8 +557,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -527,8 +557,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// diagnostics // diagnostics
if( PrintOn ){ if( PrintOn ){
(void) fprintf( getLog(), "\nPost kupdate_sd2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d " (void) fprintf( getLog(), "\n%s Post kupdate_sd2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"Sd2pc[]=[%12.5e %12.5e]", "Sd2pc[]=[%12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(), getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(), brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(), brookRandomNumberGenerator.getRvStreamOffset(),
...@@ -568,10 +598,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -568,10 +598,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// second Shake step // second Shake step
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){ if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
kshakeh_fix1( kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(), (float) brookShakeAlgorithm.getMaxIterations(),
(float) getLangevinDynamicsParticleStreamWidth(), (float) getLangevinDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(), brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(), brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(), positionStream.getBrookStream(),
...@@ -595,6 +625,31 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI ...@@ -595,6 +625,31 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(), brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(),
positionStream.getBrookStream() ); positionStream.getBrookStream() );
// diagnostics
if( PrintOn ){
(void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"Sd2pc[]=[%12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(),
derivedParameters[Sd2pc1], derivedParameters[Sd2pc2] );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
brookStreamInternalPos = velocityStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nVelocityStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
// brookRandomNumberGenerator.getRandomNumberStream( brookRandomNumberGenerator.getRvStreamIndex() )->printToFile( getLog() );
}
} else { } else {
//kadd3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() ); //kadd3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() );
ksetStr3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() ); ksetStr3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() );
......
...@@ -38,6 +38,39 @@ ...@@ -38,6 +38,39 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
struct ShakeCluster {
int _centralID;
int _peripheralID[3];
int _size;
float _distance;
float _centralInvMass, _peripheralInvMass;
ShakeCluster(){};
ShakeCluster( int centralID, float invMass) : _centralID(centralID), _centralInvMass(invMass), _size(0) {};
void addAtom( int id, float dist, float invMass){
if( _size == 3 ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << "atom " << id << " has more than 3 constraints!." << std::endl;
throw OpenMMException( message.str() );
}
if( _size > 0 && dist != _distance ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << "atom " << id << " has different constraint distances: " << dist << " and " << _distance << std::endl;
throw OpenMMException( message.str() );
}
if( _size > 0 && invMass != _peripheralInvMass ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << " constrainted atoms associated w/ atom " << id << " have different masses: " << invMass << " and " << _peripheralInvMass << std::endl;
throw OpenMMException( message.str() );
}
_peripheralID[_size++] = id;
_distance = dist;
_peripheralInvMass = invMass;
}
};
/** /**
* *
* Constructor * Constructor
...@@ -74,8 +107,6 @@ BrookShakeAlgorithm::BrookShakeAlgorithm( ){ ...@@ -74,8 +107,6 @@ BrookShakeAlgorithm::BrookShakeAlgorithm( ){
_shakeStreams[ii] = NULL; _shakeStreams[ii] = NULL;
} }
_inverseHydrogenMass = one/( (BrookOpenMMFloat) 1.008);
// setup inverse sqrt masses // setup inverse sqrt masses
_inverseSqrtMasses = NULL; _inverseSqrtMasses = NULL;
...@@ -164,17 +195,6 @@ int BrookShakeAlgorithm::setShakeTolerance( BrookOpenMMFloat tolerance ){ ...@@ -164,17 +195,6 @@ int BrookShakeAlgorithm::setShakeTolerance( BrookOpenMMFloat tolerance ){
return DefaultReturnValue; return DefaultReturnValue;
} }
/**
* Get inverse of hydrogen mass
*
* @return inverse of hydrogen mass
*
*/
BrookOpenMMFloat BrookShakeAlgorithm::getInverseHydrogenMass( void ) const {
return _inverseHydrogenMass;
}
/** /**
* Get Particle stream size * Get Particle stream size
* *
...@@ -412,6 +432,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){ ...@@ -412,6 +432,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){
* @param masses masses * @param masses masses
* @param constraintIndices constraint particle indices * @param constraintIndices constraint particle indices
* @param constraintLengths constraint lengths * @param constraintLengths constraint lengths
* @param platform platform reference
* *
* @return ErrorReturnValue if error * @return ErrorReturnValue if error
* *
...@@ -420,7 +441,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){ ...@@ -420,7 +441,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){
*/ */
int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices, int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices,
const std::vector<double>& constraintLengths ){ const std::vector<double>& constraintLengths, const Platform& platform ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -431,9 +452,7 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co ...@@ -431,9 +452,7 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
int shakeParticleStreamSize = getShakeParticleStreamSize(); FILE* log = getLog();
int shakeConstraintStreamSize = getShakeConstraintStreamSize();
// check that number of constraints for two input vectors is consistent // check that number of constraints for two input vectors is consistent
if( constraintIndices.size() != constraintLengths.size() ){ if( constraintIndices.size() != constraintLengths.size() ){
...@@ -443,94 +462,127 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co ...@@ -443,94 +462,127 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co
return ErrorReturnValue; return ErrorReturnValue;
} }
// allocate arrays to be read down to board // get constraint count for each atom
BrookOpenMMFloat* particleIndices = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat* shakeParameters = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
for( int ii = 0; ii < 4*shakeConstraintStreamSize; ii++ ){
particleIndices[ii] = (BrookOpenMMFloat) -1;
}
memset( shakeParameters, 0, 4*shakeConstraintStreamSize*sizeof( BrookOpenMMFloat ) );
vector<int> constraintCount( masses.size(), 0);
std::vector< std::vector<int> >::const_iterator particleIterator = constraintIndices.begin(); std::vector< std::vector<int> >::const_iterator particleIterator = constraintIndices.begin();
std::vector<double>::const_iterator distanceIterator = constraintLengths.begin();
int constraintIndex = 0;
_numberOfConstraints = 0;
while( particleIterator != constraintIndices.end() ){ while( particleIterator != constraintIndices.end() ){
std::vector<int> particleVector = *particleIterator; std::vector<int> particleVector = *particleIterator;
int atomI = particleVector[0];
int atomJ = particleVector[1];
// check that array of indices is not too small or large // check that atom indices are not out of range
if( particleVector.size() < 2 ){ if( atomI < 0 || atomI > (int) masses.size() ){
std::stringstream message; std::stringstream message;
message << methodName << " particleIndices size=" << particleVector.size() << " is too small at constraintIndex=" << (constraintIndex/4); message << methodName << " constraint I-index=" << atomI << " is out of range [0, " << masses.size() << "]" << std::endl;
throw OpenMMException( message.str() ); throw OpenMMException( message.str() );
return ErrorReturnValue; return ErrorReturnValue;
} }
if( particleVector.size() > 4 ){ if( atomJ < 0 || atomJ > (int) masses.size() ){
std::stringstream message; std::stringstream message;
message << methodName << " particleIndices size=" << particleVector.size() << " is too large at constraintIndex=" << (constraintIndex/4); message << methodName << " constraint J-index=" << atomJ << " is out of range [0, " << masses.size() << "]" << std::endl;
throw OpenMMException( message.str() ); throw OpenMMException( message.str() );
return ErrorReturnValue; return ErrorReturnValue;
}
int index = 0;
int particleIndex1 = -1;
int particleIndex2 = -1;
for( std::vector<int>::const_iterator ii = particleVector.begin(); ii != particleVector.end(); ii++, index++ ){
particleIndices[constraintIndex + index] = (BrookOpenMMFloat) *ii;
if( index == 0 ){
particleIndex1 = *ii;
} else if( index == 1 ){
particleIndex2 = *ii;
}
} }
// skip negative indices constraintCount[atomI]++;
constraintCount[atomJ]++;
particleIterator++;
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
map<int, ShakeCluster> clusters;
particleIterator = constraintIndices.begin();
std::vector<double>::const_iterator distanceIterator = constraintLengths.begin();
while( particleIterator != constraintIndices.end() ){
if( particleIndex1 < 0 || particleIndex2 < 0 ){ float distance = static_cast<float>( *distanceIterator );
particleIterator++; std::vector<int> particleVector = *particleIterator;
distanceIterator++; int atomI = particleVector[0];
continue; int atomJ = particleVector[1];
// Determine the central atom.
bool firstIsCentral;
if( constraintCount[atomI] > 1 ){
firstIsCentral = true;
} else if (constraintCount[atomJ] > 1 ){
firstIsCentral = false;
} else if( atomI < atomJ ){
firstIsCentral = true;
} else {
firstIsCentral = false;
} }
int centralID, peripheralID;
float centralInvMass, peripheralInvMass;
if( firstIsCentral ){
centralID = atomI;
peripheralID = atomJ;
centralInvMass = static_cast<float>( masses[atomI] );
peripheralInvMass = static_cast<float>( masses[atomJ] );
} else {
centralID = atomJ;
peripheralID = atomI;
centralInvMass = static_cast<float>( masses[atomJ] );
peripheralInvMass = static_cast<float>( masses[atomI] );
}
if( constraintCount[peripheralID] != 1 ){
throw OpenMMException("Only bonds to hydrogens may be constrained");
}
// Add it to the cluster
if( clusters.find(centralID) == clusters.end() ){
clusters[centralID] = ShakeCluster( centralID, centralInvMass );
}
clusters[centralID].addAtom( peripheralID, distance, peripheralInvMass );
particleIterator++;
distanceIterator++;
}
// allocate space for streams
if( particleIndex1 >= (int) masses.size() || particleIndex2 >= (int) masses.size() ){ _initializeStreamSizes( (int) masses.size(), (int) clusters.size(), platform );
std::stringstream message; _initializeStreams( platform );
message << methodName << " constraint indices=[" << particleIndex1 << ", " << particleIndex2 << "] greater than mass array size=" << masses.size();
throw OpenMMException( message.str() ); int shakeParticleStreamSize = getShakeParticleStreamSize();
return ErrorReturnValue; int shakeConstraintStreamSize = getShakeConstraintStreamSize();
}
// insure heavy particle is first // allocate arrays to be read down to board and initialize
if( masses[particleIndex1] < masses[particleIndex2] ){ BrookOpenMMFloat* particleIndices = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat* shakeParameters = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat minusOne = static_cast<BrookOpenMMFloat>(-1.0);
for( int ii = 0; ii < 4*shakeConstraintStreamSize; ii++ ){
particleIndices[ii] = minusOne;
}
memset( shakeParameters, 0, 4*shakeConstraintStreamSize*sizeof( BrookOpenMMFloat ) );
BrookOpenMMFloat swap = particleIndices[constraintIndex]; // load indices & parameters
particleIndices[constraintIndex] = particleIndices[constraintIndex+1];
particleIndices[constraintIndex+1] = swap;
int swapI = particleIndex1; int constraintIndex = 0;
particleIndex1 = particleIndex2; _numberOfConstraints = static_cast<int>(clusters.size());
particleIndex2 = swapI; for( map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter ){
}
// set parameters: const ShakeCluster& cluster = iter->second;
// (1) 1/(heavy particle mass) particleIndices[constraintIndex] = static_cast<BrookOpenMMFloat>( cluster._centralID );
// (2) 0.5/(heavy particle mass+hydrogen mass) for( int ii = 0; ii < cluster._size; ii++ ){
// (3) constraint distance**2 particleIndices[constraintIndex+ii+1] = static_cast<BrookOpenMMFloat>( cluster._peripheralID[ii] );
}
shakeParameters[constraintIndex] = one/( (BrookOpenMMFloat) masses[particleIndex1] ); shakeParameters[constraintIndex] = one/( static_cast<BrookOpenMMFloat>( cluster._centralInvMass ) );
shakeParameters[constraintIndex+1] = half/( (BrookOpenMMFloat) (masses[particleIndex1] + masses[particleIndex2]) ); shakeParameters[constraintIndex+1] = half/( static_cast<BrookOpenMMFloat>(cluster._centralInvMass + cluster._peripheralInvMass) );
shakeParameters[constraintIndex+2] = (BrookOpenMMFloat) ( (*distanceIterator)*(*distanceIterator) ); shakeParameters[constraintIndex+2] = static_cast<BrookOpenMMFloat>( cluster._distance*cluster._distance );
shakeParameters[constraintIndex+3] = one/( static_cast<BrookOpenMMFloat>( cluster._peripheralInvMass ) );
particleIterator++;
distanceIterator++;
constraintIndex += 4; constraintIndex += 4;
_numberOfConstraints++;
} }
// write entries to board // write entries to board
...@@ -593,12 +645,7 @@ int BrookShakeAlgorithm::setup( const std::vector<double>& masses, const std::ve ...@@ -593,12 +645,7 @@ int BrookShakeAlgorithm::setup( const std::vector<double>& masses, const std::ve
int numberOfParticles = (int) masses.size(); int numberOfParticles = (int) masses.size();
setNumberOfParticles( numberOfParticles ); setNumberOfParticles( numberOfParticles );
// set stream sizes and then create streams _setShakeStreams( masses, constraintIndices, constraintLengths, platform );
_initializeStreamSizes( numberOfParticles, (int) constraintIndices.size(), platform );
_initializeStreams( platform );
_setShakeStreams( masses, constraintIndices, constraintLengths );
return DefaultReturnValue; return DefaultReturnValue;
} }
...@@ -634,6 +681,9 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const { ...@@ -634,6 +681,9 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
#define LOCAL_SPRINTF(a,b,c) sprintf( (a), (b), (c) ); #define LOCAL_SPRINTF(a,b,c) sprintf( (a), (b), (c) );
#endif #endif
(void) LOCAL_SPRINTF( value, "%d", getNumberOfConstraints() );
message << _getLine( tab, "Number of constraints:", value );
(void) LOCAL_SPRINTF( value, "%d", getNumberOfParticles() ); (void) LOCAL_SPRINTF( value, "%d", getNumberOfParticles() );
message << _getLine( tab, "Number of particles:", value ); message << _getLine( tab, "Number of particles:", value );
...@@ -652,9 +702,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const { ...@@ -652,9 +702,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
(void) LOCAL_SPRINTF( value, "%d", getParticleStreamSize() ); (void) LOCAL_SPRINTF( value, "%d", getParticleStreamSize() );
message << _getLine( tab, "Particle stream size:", value ); message << _getLine( tab, "Particle stream size:", value );
(void) LOCAL_SPRINTF( value, "%d", getNumberOfConstraints() );
message << _getLine( tab, "Number of constraints:", value );
(void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamWidth() ); (void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamWidth() );
message << _getLine( tab, "Constraint stream width:", value ); message << _getLine( tab, "Constraint stream width:", value );
...@@ -664,9 +711,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const { ...@@ -664,9 +711,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
(void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamSize() ); (void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamSize() );
message << _getLine( tab, "Constraint stream size:", value ); message << _getLine( tab, "Constraint stream size:", value );
(void) LOCAL_SPRINTF( value, "%.5f", 1.0f/getInverseHydrogenMass() );
message << _getLine( tab, "H-mass:", value );
message << _getLine( tab, "Log:", (getLog() ? Set : NotSet) ); message << _getLine( tab, "Log:", (getLog() ? Set : NotSet) );
message << _getLine( tab, "ParticleIndices:", (getShakeParticleIndicesStream() ? Set : NotSet) ); message << _getLine( tab, "ParticleIndices:", (getShakeParticleIndicesStream() ? Set : NotSet) );
......
...@@ -114,14 +114,6 @@ class BrookShakeAlgorithm : public BrookCommon { ...@@ -114,14 +114,6 @@ class BrookShakeAlgorithm : public BrookCommon {
int setShakeTolerance( BrookOpenMMFloat tolerance ); int setShakeTolerance( BrookOpenMMFloat tolerance );
/**
* Get inverse of hydrogen mass
*
* @return inverse of hydrogen mass
*/
BrookOpenMMFloat getInverseHydrogenMass( void ) const;
/** /**
* Get Shake particle stream width * Get Shake particle stream width
* *
...@@ -291,10 +283,6 @@ class BrookShakeAlgorithm : public BrookCommon { ...@@ -291,10 +283,6 @@ class BrookShakeAlgorithm : public BrookCommon {
int _maxIterations; int _maxIterations;
// inverse of H mass
BrookOpenMMFloat _inverseHydrogenMass;
// particle stream dimensions // particle stream dimensions
int _shakeParticleStreamWidth; int _shakeParticleStreamWidth;
...@@ -361,6 +349,7 @@ class BrookShakeAlgorithm : public BrookCommon { ...@@ -361,6 +349,7 @@ class BrookShakeAlgorithm : public BrookCommon {
* @param masses masses * @param masses masses
* @param constraintIndices constraint particle indices * @param constraintIndices constraint particle indices
* @param constraintLengths constraint lengths * @param constraintLengths constraint lengths
* @param platform platform reference
* *
* @return ErrorReturnValue if error * @return ErrorReturnValue if error
* *
...@@ -369,7 +358,7 @@ class BrookShakeAlgorithm : public BrookCommon { ...@@ -369,7 +358,7 @@ class BrookShakeAlgorithm : public BrookCommon {
*/ */
int _setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices, int _setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices,
const std::vector<double>& constraintLengths ); const std::vector<double>& constraintLengths, const Platform& platform );
}; };
......
...@@ -38,10 +38,6 @@ ...@@ -38,10 +38,6 @@
#include "gpu/kupdatemd.h" #include "gpu/kupdatemd.h"
#include "gpu/kcommon.h" #include "gpu/kcommon.h"
// use random number generator
#include "../../reference/src/SimTKUtilities/SimTKOpenMMUtilities.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -162,7 +158,7 @@ int BrookVerletDynamics::updateParameters( double stepSize ){ ...@@ -162,7 +158,7 @@ int BrookVerletDynamics::updateParameters( double stepSize ){
return DefaultReturnValue; return DefaultReturnValue;
}; }
/** /**
* Update * Update
...@@ -182,8 +178,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -182,8 +178,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
static const char* methodName = "\nBrookVerletDynamics::update"; static std::string methodName = "\nBrookVerletDynamics::update";
static const int PrintOn = 1; static const int PrintOn = 0;
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -193,9 +189,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -193,9 +189,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
static int showAux = 1; static int showAux = 1;
(void) fprintf( getLog(), "%s shake=%d\n", methodName, brookShakeAlgorithm.getNumberOfConstraints() );
(void) fflush( getLog() );
if( showAux ){ if( showAux ){
showAux = 0; showAux = 0;
...@@ -203,15 +196,13 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -203,15 +196,13 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
std::string contents = _brookVelocityCenterOfMassRemoval->getContentsString( ); std::string contents = _brookVelocityCenterOfMassRemoval->getContentsString( );
(void) fprintf( getLog(), "%s VelocityCenterOfMassRemoval contents\n%s", methodName, contents.c_str() ); (void) fprintf( getLog(), "%s VelocityCenterOfMassRemoval contents\n%s", methodName, contents.c_str() );
*/ */
std::string contents = brookShakeAlgorithm.getContentsString( ); (void) fprintf( getLog(), "%s Shake contents\n%s", methodName, brookShakeAlgorithm.getContentsString().c_str() );
(void) fprintf( getLog(), "%s Shake contents\n%s", methodName, contents.c_str() );
(void) fflush( getLog() ); (void) fflush( getLog() );
} }
} }
// Shake step // To Shake or not to Shake
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){ if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
...@@ -228,8 +219,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -228,8 +219,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
// diagnostics // diagnostics
if( PrintOn && getLog() ){ if( PrintOn && getLog() ){
(void) fprintf( getLog(), "\nPost kupdate_md_verlet: particleStrW=%3d step=%.5f", (void) fprintf( getLog(), "\n%s Post kupdate_md_verlet: particleStrW=%3d step=%.5f",
getVerletDynamicsParticleStreamWidth(), getStepSize() ); methodName.c_str(), getVerletDynamicsParticleStreamWidth(), getStepSize() );
(void) fprintf( getLog(), "\nInverseMassStream\n" ); (void) fprintf( getLog(), "\nInverseMassStream\n" );
getInverseMassStream()->printToFile( getLog() ); getInverseMassStream()->printToFile( getLog() );
...@@ -253,7 +244,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -253,7 +244,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
kshakeh_fix1( kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(), (float) brookShakeAlgorithm.getMaxIterations(),
(float) getVerletDynamicsParticleStreamWidth(), (float) getVerletDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(), brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(), brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(), positionStream.getBrookStream(),
...@@ -266,8 +256,7 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -266,8 +256,7 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
if( (1|| PrintOn) && getLog() ){ if( (1|| PrintOn) && getLog() ){
(void) fprintf( getLog(), "\nPost kshakeh_fix1: particleStrW=%3d invMass_H=%.5f", (void) fprintf( getLog(), "\n%s Post kshakeh_fix1: particleStrW=%3d", methodName.c_str(), getVerletDynamicsParticleStreamWidth() );
getVerletDynamicsParticleStreamWidth(), brookShakeAlgorithm.getInverseHydrogenMass() );
(void) fprintf( getLog(), "\nShakeParticleIndicesStream\n" ); (void) fprintf( getLog(), "\nShakeParticleIndicesStream\n" );
brookShakeAlgorithm.getShakeParticleIndicesStream()->printToFile( getLog() ); brookShakeAlgorithm.getShakeParticleIndicesStream()->printToFile( getLog() );
...@@ -311,8 +300,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -311,8 +300,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
if( ( 1 || PrintOn) && getLog() ){ if( ( 1 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\nPost kshakeh_update2_fix1: particleStrW=%3d", (void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d",
getVerletDynamicsParticleStreamWidth() ); methodName.c_str(), getVerletDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\nShakeInverseMapStream\n" ); (void) fprintf( getLog(), "\nShakeInverseMapStream\n" );
brookShakeAlgorithm.getShakeInverseMapStream()->printToFile( getLog() ); brookShakeAlgorithm.getShakeInverseMapStream()->printToFile( getLog() );
...@@ -339,15 +328,33 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp ...@@ -339,15 +328,33 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
} }
// second integration step // second integration step
kupdate_md2( (float) getStepSize(),
float inverseStepSize = 1.0f/getStepSize();
kupdate_md2( inverseStepSize,
getXPrimeStream()->getBrookStream(), getXPrimeStream()->getBrookStream(),
positionStream.getBrookStream(), positionStream.getBrookStream(),
velocityStream.getBrookStream(), velocityStream.getBrookStream(),
positionStream.getBrookStream() ); positionStream.getBrookStream() );
if( ( 1 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\n%s Post kupdate_md2: inverseStepSize=%3e",
methodName.c_str(), inverseStepSize );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
brookStreamInternalPos = velocityStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nVelocityStream\n" );
brookStreamInternalPos->printToFile( getLog() );
}
} else { } else {
//ksetStr3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() ); kupdateMdNoShake( getStepSize(),
float inverseStepSize = 1.0f/getStepSize();
kupdateMdNoShake( inverseStepSize,
positionStream.getBrookStream(), positionStream.getBrookStream(),
velocityStream.getBrookStream(), velocityStream.getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream(),
......
...@@ -102,15 +102,6 @@ kernel void loop2Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jA ...@@ -102,15 +102,6 @@ kernel void loop2Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jA
} }
/****************************************************************
* This file is part of the gpu acceleration library for gromacs.
* Author: Mark Friedrichs
*
* This kernel was developed in collaboration with
*
* Copyright (C) Pande Group, Stanford, 2006
*****************************************************************/
kernel void loop2InternalR2( float4 r2, float4 jAtomicRadiiScaled, kernel void loop2InternalR2( float4 r2, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){ float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
...@@ -355,196 +346,6 @@ kernel void bornSumInternal( float3 d1, float3 d2, float3 d3, float3 d4, float4 ...@@ -355,196 +346,6 @@ kernel void bornSumInternal( float3 d1, float3 d2, float3 d3, float3 d4, float4
//60 //60
} }
// This kernel includes terms dL/dr & dU/dr from paper
// these values are zero, but included here to confirm that is the case
kernel void loop2InternalX( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2, l_ij3;
float4 u_ij, u_ij2, u_ij3;
float4 t2,t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
float4 diff_u2_l2;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask, t2Mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 );
rInverse = rsqrt( r2 );
r2Inverse = rInverse*rInverse;
rPlusScaledJ = r + jAtomicRadiiScaled;
rMinusScaledJ = r - jAtomicRadiiScaled;
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij;
l_ij2 = l_ij*l_ij;
u_ij = 1.0f/rPlusScaledJ;
u_ij2 = u_ij*u_ij;
diff_u2_l2 = u_ij2 - l_ij2;
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled;
logRatio = log(u_ij/l_ij);
// terms associated (t2) w/ dL/dr & dU/dr are zero; included here to test that true
// loop2Internal() should be used
t3 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse;
l_ij3 = l_ij2*l_ij;
// t1 in Tinker code
t2 = 0.5f*l_ij2 + 0.25f*scaledRadiusJ2*l_ij3/r - 0.25f*(l_ij/r + l_ij3*r);
t2Mask = iAtomicRadii4 < rMinusScaledJ ? t2 : zero4;
t3 += t2Mask;
// t2 in Tinker code
u_ij3 = u_ij2*u_ij;
t2 = -0.5f*u_ij2 - 0.25f*scaledRadiusJ2*u_ij3/r + 0.25f*(u_ij/r + u_ij3*r);
t3 += t2;
de = mask*bornForce*t3*rInverse;
// Born radius term
// bornSum = l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
bornSum = l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
t3 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4;
bornSum += t3;
bornSum *= mask;
}
/* This is a copy of loop2Internal(), but w/ the calculation of bornSum removed */
kernel void loop2InternalNoSumX( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2, l_ij3;
float4 u_ij, u_ij2, u_ij3;
float4 t2, t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask, t2Mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 );
rInverse = rsqrt( r2 );
r2Inverse = rInverse*rInverse;
rPlusScaledJ = r + jAtomicRadiiScaled;
rMinusScaledJ = r - jAtomicRadiiScaled;
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
// mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : one4;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij;
l_ij2 = l_ij*l_ij;
u_ij = 1.0f/rPlusScaledJ;
u_ij2 = u_ij*u_ij;
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled;
logRatio = log(u_ij/l_ij);
// terms associated w/ dL/dr & dU/dr are zero and not included
t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
l_ij3 = l_ij2*l_ij;
// t1 in Tinker code
t2 = 0.5f*l_ij2 + 0.25f*scaledRadiusJ2*l_ij3/r - 0.25f*(l_ij/r + l_ij3*r);
t2Mask = iAtomicRadii4 < rMinusScaledJ ? t2 : zero4;
t3 += t2Mask;
// t2 in Tinker code
u_ij3 = u_ij2*u_ij;
t2 = -0.5f*u_ij2 - 0.25f*scaledRadiusJ2*u_ij3/r + 0.25f*(u_ij/r + u_ij3*r);
t3 += t2;
de = mask*bornForce*t3*rInverse;
}
/* --------------------------------------------------------------------------------------- /* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios) Calculate Loop 2 OBC forces (Simbios)
...@@ -971,8 +772,8 @@ kernel float4 scalar_force_CDLJMerge( float4 qq, float epsfac, float4 sig, float ...@@ -971,8 +772,8 @@ kernel float4 scalar_force_CDLJMerge( float4 qq, float epsfac, float4 sig, float
//???? FLOPS //???? FLOPS
kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor, kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float4 posq[][], float streamWidth, float fstreamWidth, float3 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][], float2 scaledAtomicRadii[][],
out float4 bornForce<> ){ out float4 bornForce<> ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -980,14 +781,11 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa ...@@ -980,14 +781,11 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// atomic radii: radius - dielectric offset // atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii; float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset ) // scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii; float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii; float4 jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce // (Born radii**2)*BornForce*ObcChainForce
...@@ -1043,46 +841,33 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa ...@@ -1043,46 +841,33 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// etch i1 position, Born prefactor, atomic radius // fetch position, (atomic radius - dielectric offset )
i1pos = posq[ iAtom ].xyz;
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i2 position, Born prefactor, atomic radius i1pos = posq[ iAtom ];
i1AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1; iAtom.x += 1;
i2pos = posq[ iAtom ].xyz; i2pos = posq[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ]; i2AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// etch i3 position, Born prefactor, atomic radius
iAtom.x += 1; iAtom.x += 1;
i3pos = posq[ iAtom ].xyz; i3pos = posq[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ]; i3AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// etch i4 position, Born prefactor, atomic radius
iAtom.x += 1; iAtom.x += 1;
i4pos = posq[ iAtom ].xyz; i4pos = posq[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ]; i4AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// create float4 for main vars
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii ); bornForce = float4( 0.0f, 0.0f, 0.0f, 0.0f );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup // inner loop indices setup
//changed the following instruction for rounding issues on some ASICs //changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms ); //whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms); tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms); whichRep = round((forceIndex - tmp)/roundedUpAtoms);
jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) ); jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
jStart = whichRep*jBlock; jStart = whichRep*jBlock;
...@@ -1100,30 +885,23 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa ...@@ -1100,30 +885,23 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
// gather required values // gather required values
j1AtomicRadii = atomicRadii[ jAtom ]; j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ]; j1pos = posq[ jAtom ];
j1pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f; jAtom.x += 1.0f;
j2AtomicRadii = atomicRadii[ jAtom ]; j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ]; j2pos = posq[ jAtom ];
j2pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f; jAtom.x += 1.0f;
j3AtomicRadii = atomicRadii[ jAtom ]; j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ]; j3pos = posq[ jAtom ];
j3pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f; jAtom.x += 1.0f;
j4AtomicRadii = atomicRadii[ jAtom ]; j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ]; j4pos = posq[ jAtom ];
j4pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f; jAtom.x += 1.0f;
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii ); jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -1190,1212 +968,3 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa ...@@ -1190,1212 +968,3 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
jAtom.y += 1.0f; jAtom.y += 1.0f;
} }
} }
//???? FLOPS
kernel void kCalculateBornRadiiOk( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
//float4 jBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float tmp;
float tmp2;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// etch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
//jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
d1 = j1pos - i1pos; //3
d2 = j2pos - i1pos; //3
d3 = j3pos - i1pos; //3
d4 = j4pos - i1pos; //3
// i = 1
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, bornSum ); //156 : 368
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 2
d1 = j1pos - i2pos; //3
d2 = j2pos - i2pos; //3
d3 = j3pos - i2pos; //3
d4 = j4pos - i2pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, bornSum );//156 : 368
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 3
d1 = j1pos - i3pos; //3
d2 = j2pos - i3pos; //3
d3 = j3pos - i3pos; //3
d4 = j4pos - i3pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, bornSum );//156 : 368
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 4
d1 = j1pos - i4pos; //3
d2 = j2pos - i4pos; //3
d3 = j3pos - i4pos; //3
d4 = j4pos - i4pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, bornSum );//156 : 366
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms based on unroll
duplicationFactor: ?
strheight: atom stream height
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
posq: atom positions and charge
atomicRadii: atomic radii
scaledAtomicRadii: scaled atomic radii
bornForceFactor: (Born radii)**2*bornForce*Obcforce
bornForce1: i-unroll first force component, including dBorn_r/dr in .w
bornForce2: i-unroll second force component, including dBorn_r/dr in .w
bornForce3: i-unroll third force component, including dBorn_r/dr in .w
bornForce4: i-unroll fourth force component, including dBorn_r/dr in .w
#endif
--------------------------------------------------------------------------------------- */
kernel void kObcLoop2Cdlj4( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth,
float jstreamWidth, float epsfac, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][], float4 isigeps[][],
float4 sigma[][], float4 epsilon[][], float excl[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
float i1q, i2q, i3q, i4q;
float i1sig, i2sig, i3sig, i4sig;
float i1eps, i2eps, i3eps, i4eps;
float j1q, j2q, j3q, j4q;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2, r2Masked;
float3 pad;
float4 exclmask;
float2 iatom;
float2 jstrindex;
float linind;
float4 jsig, jeps;
float4 jQ,qq;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, fs, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
const float4 exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// fetch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1q = posq[ iAtom ].w;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i1sig = jstrindex.x;
i1eps = jstrindex.y;
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2q = posq[ iAtom ].w;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i2sig = jstrindex.x;
i2eps = jstrindex.y;
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3q = posq[ iAtom ].w;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i3sig = jstrindex.x;
i3eps = jstrindex.y;
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4q = posq[ iAtom ].w;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i4sig = jstrindex.x;
i4eps = jstrindex.y;
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
whichRep = floor( forceIndex / roundedUpAtoms );
jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
exclind.x = iAtomLinearIndex;
exclind.y = jStart*streamWidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jAtom.x + jAtom.y*streamWidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstreamWidth))/jstreamWidth);
jstrindex.x = linind - jstrindex.y*jstreamWidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
j1q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
j2q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
j3q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
j4q = posq[ jAtom ].w;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
jQ = float4( j1q, j2q, j3q, j4q );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
sig = i1sig + jsig;
eps = i1eps * jeps;
qq = i1q*jQ;
d1 = j1pos - i1pos;
d2 = j2pos - i1pos;
d3 = j3pos - i1pos;
d4 = j4pos - i1pos;
// i = 1
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce1.xyz += de.x*d1;
bornForce1.xyz += de.y*d2;
bornForce1.xyz += de.z*d3;
bornForce1.xyz += de.w*d4;
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 2
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i2sig + jsig;
eps = i2eps * jeps;
qq = i2q*jQ;
d1 = j1pos - i2pos;
d2 = j2pos - i2pos;
d3 = j3pos - i2pos;
d4 = j4pos - i2pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce2.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce2.xyz += de.z*d3;
bornForce2.xyz += de.w*d4;
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 3
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i3sig + jsig;
eps = i3eps * jeps;
qq = i3q*jQ;
d1 = j1pos - i3pos;
d2 = j2pos - i3pos;
d3 = j3pos - i3pos;
d4 = j4pos - i3pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce3.xyz += de.x*d1;
bornForce3.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce3.xyz += de.w*d4;
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 4
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i4sig + jsig;
eps = i4eps * jeps;
qq = i4q*jQ;
d1 = j1pos - i4pos;
d2 = j2pos - i4pos;
d3 = j3pos - i4pos;
d4 = j4pos - i4pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce4.xyz += de.x*d1;
bornForce4.xyz += de.y*d2;
bornForce4.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// j = 1
d1 = j1pos - i1pos;
d2 = j1pos - i2pos;
d3 = j1pos - i3pos;
d4 = j1pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 2
d1 = j2pos - i1pos;
d2 = j2pos - i2pos;
d3 = j2pos - i3pos;
d4 = j2pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 3
d1 = j3pos - i1pos;
d2 = j3pos - i2pos;
d3 = j3pos - i3pos;
d4 = j3pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 4
d1 = j4pos - i1pos;
d2 = j4pos - i2pos;
d3 = j4pos - i3pos;
d4 = j4pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms based on unroll
duplicationFactor: ?
strheight: atom stream height
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
posq: atom positions and charge
atomicRadii: atomic radii
scaledAtomicRadii: scaled atomic radii
bornForceFactor: (Born radii)**2*bornForce*Obcforce
bornForce1: i-unroll first force component, including dBorn_r/dr in .w
bornForce2: i-unroll second force component, including dBorn_r/dr in .w
bornForce3: i-unroll third force component, including dBorn_r/dr in .w
bornForce4: i-unroll fourth force component, including dBorn_r/dr in .w
#endif
--------------------------------------------------------------------------------------- */
kernel void kObcLoop2Cdlj4X( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth,
float jstreamWidth, float epsfac, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][], float4 isigeps[][],
float4 sigma[][], float4 epsilon[][], float excl[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
//float4 jBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
float i1q, i2q, i3q, i4q;
float i1sig, i2sig, i3sig, i4sig;
float i1eps, i2eps, i3eps, i4eps;
float j1q, j2q, j3q, j4q;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2, r2Masked;
float3 pad;
float4 exclmask;
float2 iatom;
float2 jstrindex;
float linind;
float4 jsig, jeps;
float4 jQ,qq;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, fs, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float sum;
// ---------------------------------------------------------------------------------------
// constants
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 frac4 = float4( 0.2f, 0.2f, 0.2f, 0.2f );
const float I_Unroll = 4.0f;
const float4 exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// fetch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1q = posq[ iAtom ].w;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i1sig = jstrindex.x;
i1eps = jstrindex.y;
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2q = posq[ iAtom ].w;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i2sig = jstrindex.x;
i2eps = jstrindex.y;
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3q = posq[ iAtom ].w;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i3sig = jstrindex.x;
i3eps = jstrindex.y;
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4q = posq[ iAtom ].w;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i4sig = jstrindex.x;
i4eps = jstrindex.y;
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
whichRep = floor( forceIndex / roundedUpAtoms );
jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
exclind.x = iAtomLinearIndex;
exclind.y = jStart*streamWidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jAtom.x + jAtom.y*streamWidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstreamWidth))/jstreamWidth);
jstrindex.x = linind - jstrindex.y*jstreamWidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
j1q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
j2q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
j3q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
j4q = posq[ jAtom ].w;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
jQ = float4( j1q, j2q, j3q, j4q );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
/*
sig = i1sig + jsig;
eps = i1eps * jeps;
qq = i1q*jQ;
d1 = j1pos - i1pos;
d2 = j2pos - i1pos;
d3 = j3pos - i1pos;
d4 = j4pos - i1pos;
// i = 1
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce1.xyz += de.x*d1;
bornForce1.xyz += de.y*d2;
bornForce1.xyz += de.z*d3;
bornForce1.xyz += de.w*d4;
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x ) + floor( exclmask.y ) + floor( exclmask.z ) + floor( exclmask.w );
if( sum > 0.25f ){
//if( exclind.y < 4.9f && exclind.y > 3.1f ){
//if( exclind.y < 0.9f ){
//if( exclind.y < 1.9f && exclind.y > 0.1f ){
//if( exclind.y < 2.9f && exclind.y > 1.1f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce1.x += 1.0f;
bornForce1.y += exclusions;
bornForce1.z += sum;
bornForce1.w += exclmask.w;
//bornForce1 = exclmask;
}
// ---------------------------------------------------------------------------------------
// i = 2
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce2.x += 1.0f;
bornForce2.y += exclusions;
bornForce2.z += sum;
bornForce2.w += exclind.y;
}
/*
sig = i2sig + jsig;
eps = i2eps * jeps;
qq = i2q*jQ;
d1 = j1pos - i2pos;
d2 = j2pos - i2pos;
d3 = j3pos - i2pos;
d4 = j4pos - i2pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce2.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce2.xyz += de.z*d3;
bornForce2.xyz += de.w*d4;
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
// i = 3
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce3.x += 1.0f;
bornForce3.y += exclusions;
bornForce3.z += sum;
bornForce3.w += exclind.y;
}
/*
sig = i3sig + jsig;
eps = i3eps * jeps;
qq = i3q*jQ;
d1 = j1pos - i3pos;
d2 = j2pos - i3pos;
d3 = j3pos - i3pos;
d4 = j4pos - i3pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce3.xyz += de.x*d1;
bornForce3.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce3.xyz += de.w*d4;
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
// i = 4
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce4.x += 1.0f;
bornForce4.y += exclusions;
bornForce4.z += sum;
bornForce4.w += exclind.y;
}
/*
sig = i4sig + jsig;
eps = i4eps * jeps;
qq = i4q*jQ;
d1 = j1pos - i4pos;
d2 = j2pos - i4pos;
d3 = j3pos - i4pos;
d4 = j4pos - i4pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce4.xyz += de.x*d1;
bornForce4.xyz += de.y*d2;
bornForce4.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
/*
// j = 1
d1 = j1pos - i1pos;
d2 = j1pos - i2pos;
d3 = j1pos - i3pos;
d4 = j1pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 2
d1 = j2pos - i1pos;
d2 = j2pos - i2pos;
d3 = j2pos - i3pos;
d4 = j2pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 3
d1 = j3pos - i1pos;
d2 = j3pos - i2pos;
d3 = j3pos - i3pos;
d4 = j3pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 4
d1 = j4pos - i1pos;
d2 = j4pos - i2pos;
d3 = j4pos - i3pos;
d4 = j4pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
*/
// ---------------------------------------------------------------------------------------
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
...@@ -502,7 +502,6 @@ void kCalculateBornRadii( ...@@ -502,7 +502,6 @@ void kCalculateBornRadii(
float streamWidth, float streamWidth,
float fstreamWidth, float fstreamWidth,
::brook::stream posq, ::brook::stream posq,
::brook::stream atomicRadii,
::brook::stream scaledAtomicRadii, ::brook::stream scaledAtomicRadii,
::brook::stream bornForce1 ::brook::stream bornForce1
); );
......
...@@ -462,75 +462,6 @@ kernel void kAddAndMergeFloat4( ...@@ -462,75 +462,6 @@ kernel void kAddAndMergeFloat4(
} }
} }
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
/*
kernel void kAddAndMergeFloat4_4(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
float4 inStream<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
out float4 outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
float floor_linind_iUnroll;
linind = (indexof outstream).x + (indexof outstream).y * atomStrWidth;
//If odd or even, we pick from diferent streams.
//odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
//linind = floor( linind / iUnroll );
floor_linind_iUnroll = round( (linind - fmod(linind, iUnroll))/iUnroll );
odd = linind - floor_linind_iUnroll * iUnroll;//bixia modify
linind = floor_linind_iUnroll; //bixia modify
outstream = inStream;
outstream.w = 0.0f;
// outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0.0f; i < repfac; i+= 1.0f ) {
//pindex.y = floor( linind / pstreamStrWidth );
pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
pindex.x = linind - pindex.y * pstreamStrWidth;
if ( odd < 0.5f ) { //is odd
outstream += pstream1[ pindex ];
} else if( odd < 1.5f ){
outstream += pstream2[ pindex ];
} else if( odd < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
linind += natoms/iUnroll;
}
} */
kernel void kAddAndMergeFloat4_4( kernel void kAddAndMergeFloat4_4(
float repfac, float repfac,
float atomStreamWidth, float atomStreamWidth,
...@@ -939,158 +870,6 @@ kernel void kPostCalculateBornRadii_nobranch( ...@@ -939,158 +870,6 @@ kernel void kPostCalculateBornRadii_nobranch(
} }
kernel void kPostCalculateBornRadii_nobranchOk(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float conversion,
float mergeNonObcForces,
float4 inObcForces<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
float atomicRadii<>,
out float bornRadii<>,
out float obcChain<> ){
// ---------------------------------------------------------------------------------------
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
float4 o1,o2,o3,o4;
float4 tmp;
float2 iAtom;
float4 forces;
float expPlus, expMinus;
// ---------------------------------------------------------------------------------------
// constants -- OBC Type II
const float alphaObc = 1.0f;
const float betaObc = 0.8f;
const float gammaObc = 4.85f;
const float dielectricOffset = 0.009f;
// ---------------------------------------------------------------------------------------
// given atom index find force indices and streams
pindex = indexof( obcChain );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
// add current forces in inStream to forces stored in pstreams
// the .w entry is Born sum values; it will be used to calculate the
// Born radii and obcChain term
forces = inObcForces;
forces.w = 0.0f;
//forces = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// sum over j-loop 'duplications' by gathering from pstreams
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
o2 = pstream2[ pindex ];
o3 = pstream3[ pindex ];
o4 = pstream4[ pindex ];
tmp = qOff < 0.5f ? o1 : o2;
tmp = qOff < 1.5f ? tmp : o3;
tmp = qOff < 2.5f ? tmp : o4;
forces += tmp;
forceIndex += roundNatoms;
}
// compute Born radii and ObcChain
atomicRadiiOffset = atomicRadii - dielectricOffset;
bornSum = forces.w;
bornSum *= 0.5f*atomicRadiiOffset;
sum2 = bornSum*bornSum;
sum3 = bornSum*sum2;
// Tanh does not exist?
// calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
// tanhSum = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
tanhSum = bornSum - betaObc*sum2 + gammaObc*sum3;
expPlus = exp( tanhSum );
expMinus = 1.0f/expPlus;
tanhSum = ( expPlus - expMinus )/( expPlus + expMinus );
bornRadii = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );
obcIntermediate = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
obcChain = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
if( atomIndex >= natoms ){
bornRadii = 0.0f;
obcChain = 0.0f;
}
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kPreGbsaForce2(
float4 intermediateForceIn<>,
float bornRadii<>,
out float bornRadii2Force<> ){
// ---------------------------------------------------------------------------------------
// float P4 = 15.236f;
// P4_ec = P4/electricConstant
const float P4_ec = -0.09176f;
bornRadii2Force = P4_ec*bornRadii*bornRadii*intermediateForceIn.w;
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kPreObcForce2(
float4 intermediateForceIn<>,
float4 obcChain<>,
float bornRadii<>,
out float bornRadii2Force<> ){
// ---------------------------------------------------------------------------------------
bornRadii2Force = obcChain*bornRadii*bornRadii*intermediateForceIn.w;
}
/* Add forces from two streams */ /* Add forces from two streams */
kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){ kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){
...@@ -1114,30 +893,3 @@ kernel void kCopyFloat3To4( ...@@ -1114,30 +893,3 @@ kernel void kCopyFloat3To4(
outForce.xyz = inForce; outForce.xyz = inForce;
outForce.w = 0.0f; outForce.w = 0.0f;
} }
/* ---------------------------------------------------------------------------------------
Calculate Born radius from bonded and nonbonded Gpol
gpolNonBonded value is in gpolNonBonded.w
--------------------------------------------------------------------------------------- */
kernel void kBornRadii( float4 gpolNonBonded<>, float gpolFixed<>, out float bornRadius<> ){
// ---------------------------------------------------------------------------------------
// constants
const float electricConstant = -166.02691f;
// 0.25*P4
const float P4_25 = 3.81575f;
// ---------------------------------------------------------------------------------------
bornRadius = gpolFixed + P4_25*gpolNonBonded.w;
bornRadius = electricConstant/bornRadius;
// ---------------------------------------------------------------------------------------
}
...@@ -52,12 +52,11 @@ kernel void ...@@ -52,12 +52,11 @@ kernel void
kshakeh_fix1( kshakeh_fix1(
float maxIterations, //number of iterations float maxIterations, //number of iterations
float strwidth, //stream width of posq float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3 float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update float3 posq[][], //positions before update
float3 posqp[][], //changes to positions float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f ) float4 params<>, // (1/m0, mu/2, blensq, 1/m1 )
out float3 cposq0<>, //constrained position for heavy atom out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1 out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2 out float3 cposq2<>, //ditto for h2
...@@ -88,22 +87,20 @@ kshakeh_fix1( ...@@ -88,22 +87,20 @@ kshakeh_fix1(
//or nans/infs in the calcs. //or nans/infs in the calcs.
if( atoms.z > -0.5f ){ if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth ); aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth; aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f; mask2 = 1.0f;
} else { } else {
aj2 = aj1; aj2 = aj1;
mask2 = 0.0f; mask2 = 0.0f;
} }
if( atoms.w > -0.5f ){ if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth ); aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth; aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f; mask3 = 1.0f;
} else { } else {
aj3 = aj1; aj3 = aj1;
mask3 = 0.0f; mask3 = 0.0f;
} }
...@@ -129,10 +126,10 @@ kshakeh_fix1( ...@@ -129,10 +126,10 @@ kshakeh_fix1(
ld2 = params.z - rij2sq; ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq; ld3 = params.z - rij3sq;
xpi = posqp[ai] - xi; xpi = posqp[ai];
xpj1 = posqp[aj1] - xj1; xpj1 = posqp[aj1];
xpj2 = posqp[aj2] - xj2; xpj2 = posqp[aj2];
xpj3 = posqp[aj3] - xj3; xpj3 = posqp[aj3];
i = 0.0f; i = 0.0f;
...@@ -152,7 +149,7 @@ kshakeh_fix1( ...@@ -152,7 +149,7 @@ kshakeh_fix1(
dr = rij1 * acor; dr = rij1 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj1 -= dr * invmH; xpj1 -= dr * params.w;
//Second hydrogen //Second hydrogen
...@@ -167,7 +164,7 @@ kshakeh_fix1( ...@@ -167,7 +164,7 @@ kshakeh_fix1(
dr = rij2 * acor; dr = rij2 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj2 -= dr * invmH; xpj2 -= dr * params.w;
//Third hydrogen //Third hydrogen
...@@ -181,7 +178,7 @@ kshakeh_fix1( ...@@ -181,7 +178,7 @@ kshakeh_fix1(
dr = rij3 * acor; dr = rij3 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj3 -= dr * invmH; xpj3 -= dr * params.w;
i += 1.0f; i += 1.0f;
} }
...@@ -195,7 +192,7 @@ kshakeh_fix1( ...@@ -195,7 +192,7 @@ kshakeh_fix1(
} }
kernel void kshakeh_update2_fix1( kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions float3 posq<>, //old positions
...@@ -208,26 +205,25 @@ kernel void kshakeh_update2_fix1( ...@@ -208,26 +205,25 @@ kernel void kshakeh_update2_fix1(
){ ){
float2 atom; float2 atom;
// atom.y = floor( invmap.x / strwidth );
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth ); atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth; atom.x = invmap.x - atom.y * strwidth;
// oposq = posq;
if ( invmap.y < 0 ){ if ( invmap.y < 0 ){
oposq = posqp - posq; oposq = posqp;
} else if ( invmap.y < 0.5f ){ } else if ( invmap.y < 0.5f ){
oposq = cposq0[ atom ]; oposq += cposq0[ atom ];
} else if ( invmap.y < 1.5f){ } else if ( invmap.y < 1.5f ){
oposq = cposq1[ atom ]; oposq += cposq1[ atom ];
} else if ( invmap.y < 2.5f ){ } else if ( invmap.y < 2.5f ){
oposq = cposq2[ atom ]; oposq += cposq2[ atom ];
} else if ( invmap.y < 3.5f ){ } else if ( invmap.y < 3.5f ){
oposq = cposq3[ atom ]; oposq += cposq3[ atom ];
} }
oposq += posq;
} }
kernel void kshakeh_update1_fix1( kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions float3 posq<>, //old positions
...@@ -238,23 +234,23 @@ kernel void kshakeh_update1_fix1( ...@@ -238,23 +234,23 @@ kernel void kshakeh_update1_fix1(
float3 cposq3[][], //ditto for h3 float3 cposq3[][], //ditto for h3
out float3 oposq<> //updated deltas out float3 oposq<> //updated deltas
){ ){
float2 atom; float2 atom;
// atom.y = floor( invmap.x / strwidth );
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth ); atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth; atom.x = invmap.x - atom.y * strwidth;
oposq = posq;
if ( invmap.y < 0 ){ if ( invmap.y < 0 ){
oposq = posqp; oposq = posqp;
} else if ( invmap.y < 0.5f ){ } else if ( invmap.y < 0.5f ){
oposq += cposq0[ atom ]; oposq = cposq0[ atom ];
} else if ( invmap.y < 1.5f ){ } else if ( invmap.y < 1.5f){
oposq += cposq1[ atom ]; oposq = cposq1[ atom ];
} else if ( invmap.y < 2.5f ){ } else if ( invmap.y < 2.5f ){
oposq += cposq2[ atom ]; oposq = cposq2[ atom ];
} else if ( invmap.y < 3.5f ){ } else if ( invmap.y < 3.5f ){
oposq += cposq3[ atom ]; oposq = cposq3[ atom ];
} }
oposq += posq;
} }
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
void kshakeh_fix1 ( void kshakeh_fix1 (
const float maxIterations, const float maxIterations,
const float strwidth, const float strwidth,
const float invmH,
const float tolerance, const float tolerance,
::brook::stream atoms, ::brook::stream atoms,
::brook::stream posq, ::brook::stream posq,
......
/* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
* OpenMM * * OpenMM *
* -------------------------------------------------------------------------- * * -------------------------------------------------------------------------- *
...@@ -31,6 +30,20 @@ ...@@ -31,6 +30,20 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
//Applies a permutation to the given random vector stream
//Totally bandwidth bound and streams are large, should be done infrequently.
//Probably don't want gvin and gvout to be the same stream!
kernel void kpermute_vectors( float gstrwidth, float perm<>,
float3 gvin[][], out float3 gvout<> ){
float2 ind;
//ind.y = floor( perm / gstrwidth );
ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth );
ind.x = perm - ind.y * gstrwidth;
gvout = gvin[ ind ];
}
/* Stochastic Integrator /* Stochastic Integrator
* *
* Since we don't have random numbers on the GPU, we * Since we don't have random numbers on the GPU, we
...@@ -56,139 +69,6 @@ ...@@ -56,139 +69,6 @@
* constants that use it. * constants that use it.
* */ * */
/*
kernel void kupdate_sd1(
float xstrwidth, //atom stream width
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
//sd constants, prefixed with c
//to avoid 1 letter names, cxx = sdc[0].xx
float cem,
//sd precalculated constants
float pc1, //tau_t[0] * ( 1 - sdc[0].em )
float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh)
float pc3, //sdc[0].d/(tau_t * sdc[0].c);
//sdpc = sd precalculated per atom values
//sdpc.x = 1/sqrt(m)*sig[0].Yv
//sdpc.y = 1/sqrt(m)*sig[0].V
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>,
float3 f<>,
float3 v<>,
float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>,
out float4 posqp<>
){
float3 Vmh;
float3 fg1, fg2;
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
Vmh = sd2X * pc3 + sdpc.x * fg1;
sd1V = sdpc.y * fg2;
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
posqp.w = posq.w;
posqp.xyz = posq.xyz + vnew * pc2;
} */
/*Second part of sd update, after first constraints call*/
/*
kernel void kupdate_sd2(
float xstrwidth, //stream width of atoms
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
float pc1, //this is 1/pc2 for sd1
//= 1/(tau_t*(sdc[0].eph-sdc[0].emh))
float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1)
//per atom precalcs
//sdpc.x = 1/sqrt(m)*sig[0].Yx
//sdpc.y = 1/sqrt(m)*sig[0].X
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update
float4 posqp<>, //positions post-constraint
float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //Will need to be constrained again
){
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float3 fg1, fg2;
float3 Xmh;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
v = pc1 * ( posqp.xyz - posq.xyz );
Xmh = sd1V * pc2 + sdpc.x * fg1;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
} */
//Applies a permutation to the given random vector stream
//Totally bandwidth bound and streams are large, should be done infrequently.
//Probably don't want gvin and gvout to be the same stream!
kernel void kpermute_vectors( float gstrwidth, float perm<>,
float3 gvin[][], out float3 gvout<> )
{
float2 ind;
//ind.y = floor( perm / gstrwidth );
ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth );
ind.x = perm - ind.y * gstrwidth;
gvout = gvin[ ind ];
}
/* /*
* Alternative formulation to handle precision better * Alternative formulation to handle precision better
* In updatesd1, we output only deltaV's. The shake * In updatesd1, we output only deltaV's. The shake
...@@ -220,85 +100,39 @@ kernel void kupdate_sd1_fix1( ...@@ -220,85 +100,39 @@ kernel void kupdate_sd1_fix1(
float3 fgauss[][], float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>, float3 posq<>,
float3 f<>, float3 f<>,
float3 v<>, float3 v<>,
float invmass<>, float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2 out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>, out float3 vnew<>,
out float4 posqp<> out float3 posqp<>
){ ){
float3 Vmh; float3 Vmh;
float3 fg1, fg2; float3 fg1, fg2;
float linind_gauss; //linear index into the random numbers float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index float2 igauss; //2d index
igauss = indexof( posq ); igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
/*
Gromacs code
n = 1 -> ngtc
sdc[n].gdt = dt/tau_t[n];
sdc[n].eph = exp(sdc[n].gdt/2);
sdc[n].emh = exp(-sdc[n].gdt/2);
sdc[n].em = exp(-sdc[n].gdt);
sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1);
// 2.15 in paper (C) //2 random vectors used in each fragment
sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran);
V[n-start][d] = ism*sig[gt].V*fgauss(&jran);
v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) +
V[n-start][d] - sdc[gt].em*Vmh;
xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
*/
// --------------------------------------------------------------------------------------------- linind_gauss = 2 * linind_gauss + goffset;
// float pc3 = sdc[0].d/(tau_t * sdc[0].c); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
//sdpc.x = 1/sqrt(m)*sig[0].Yv igauss.x = linind_gauss - igauss.y * gstrwidth;
Vmh = sd2X * pc3 + sdpc.x * fg1; fg1 = fgauss[ igauss ];
// ---------------------------------------------------------------------------------------------
// --------------------------------------------------------------------------------------------- linind_gauss += 1.0f;
//sdpc.y = 1/sqrt(m)*sig[0].V igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
sd1V = sdpc.y * fg2; igauss.x = linind_gauss - igauss.y * gstrwidth;
// --------------------------------------------------------------------------------------------- fg2 = fgauss[ igauss ];
// --------------------------------------------------------------------------------------------- Vmh = sd2X * pc3 + sdpc.x * fg1;
// sd1V = sdpc.y * fg2;
// float pc1 = tau_t[0] * ( 1 - sdc[0].em ) vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem; posqp = vnew * pc2;
// float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh)
posqp = posq;
posqp.xyz += vnew * pc2;
} }
kernel void kupdate_sd2_fix1( kernel void kupdate_sd2_fix1(
...@@ -320,207 +154,34 @@ kernel void kupdate_sd2_fix1( ...@@ -320,207 +154,34 @@ kernel void kupdate_sd2_fix1(
float3 fgauss[][], float3 fgauss[][],
float3 sd1V<>, //calculated in sd1 float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update float3 posq<>, //positions pre-update
float4 posqp<>, //deltas post constraint float3 posqp<>, //deltas post constraint
float3 vnew<>, //velocities after sd1 float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1 out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //new deltas, need to be constrained again out float3 posqp2<> //new deltas, need to be constrained again
){ ){
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float3 fg1, fg2;
float3 Xmh;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
//v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!)
v = pc1 * (posqp.xyz - posq.xyz);
Xmh = sd1V * pc2 + sdpc.x * fg1;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
}
kernel void kupdate_sd1_fix1_FixedRV(
float xstrwidth, //atom stream width
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
//sd constants, prefixed with c
//to avoid 1 letter names, cxx = sdc[0].xx
float cem,
//sd precalculated constants
float pc1, //tau_t[0] * ( 1 - sdc[0].em )
float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh)
float pc3, //sdc[0].d/(tau_t * sdc[0].c);
//sdpc = sd precalculated per atom values
//sdpc.x = 1/sqrt(m)*sig[0].Yv
//sdpc.y = 1/sqrt(m)*sig[0].V
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>,
float3 f<>,
float3 v<>,
float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>,
out float4 posqp<>
){
float3 Vmh;
float3 fg1, fg2;
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float RandomValueVsp = 0.1f;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
//fg1 = fgauss[ igauss ];
fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
//fg2 = fgauss[ igauss ];
fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
/*
Gromacs code
n = 1 -> ngtc
sdc[n].gdt = dt/tau_t[n];
sdc[n].eph = exp(sdc[n].gdt/2);
sdc[n].emh = exp(-sdc[n].gdt/2);
sdc[n].em = exp(-sdc[n].gdt);
sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1);
// 2.15 in paper (C)
sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran);
V[n-start][d] = ism*sig[gt].V*fgauss(&jran);
v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) +
V[n-start][d] - sdc[gt].em*Vmh;
xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
*/
// ---------------------------------------------------------------------------------------------
// float pc3 = sdc[0].d/(tau_t * sdc[0].c);
//sdpc.x = 1/sqrt(m)*sig[0].Yv
Vmh = sd2X * pc3 + sdpc.x * fg1;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//sdpc.y = 1/sqrt(m)*sig[0].V
sd1V = sdpc.y * fg2;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//
// float pc1 = tau_t[0] * ( 1 - sdc[0].em )
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
// float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh)
posqp = posq;
posqp.xyz += vnew * pc2;
}
kernel void kupdate_sd2_fix1_FixedRV(
float xstrwidth, //stream width of atoms
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
float pc1, //this is 1/pc2 for sd1
//= 1/(tau_t*(sdc[0].eph-sdc[0].emh))
float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1)
//per atom precalcs
//sdpc.x = 1/sqrt(m)*sig[0].Yx
//sdpc.y = 1/sqrt(m)*sig[0].X
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update
float4 posqp<>, //deltas post constraint
float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //new deltas, need to be constrained again
){
float linind_gauss; //linear index into the random numbers float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index float2 igauss; //2d index
float3 fg1, fg2; float3 fg1, fg2;
float3 Xmh; float3 Xmh;
float RandomValueVsp = 0.1f;
igauss = indexof( posq ); igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x; linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset; linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth;
igauss.x = linind_gauss - igauss.y * gstrwidth; fg1 = fgauss[ igauss ];
//fg1 = fgauss[ igauss ];
fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
linind_gauss += 1.0f; linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth ); igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth); igauss.x = linind_gauss - igauss.y * gstrwidth;
igauss.x = linind_gauss - igauss.y * gstrwidth; fg2 = fgauss[ igauss ];
// fg2 = fgauss[ igauss ];
fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
//v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!) v = pc1 * posqp;
v = pc1 * (posqp.xyz - posq.xyz); Xmh = sd1V * pc2 + sdpc.x * fg1;
Xmh = sd1V * pc2 + sdpc.x * fg1; sd2X = sdpc.y * fg2;
posqp2 = posqp + sd2X - Xmh;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment