Commit 116a99ba authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods

parent ca43baf0
......@@ -286,11 +286,11 @@ int BrookBonded::isParameterStreamSet( int index ) const {
*
*/
int BrookBonded::validateInverseMapStreamCount( int index, int count ) const {
int BrookBonded::_validateInverseMapStreamCount( int index, int count ) const {
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::validateInverseMapStreamCount";
static const std::string methodName = "BrookBonded::_validateInverseMapStreamCount";
// ---------------------------------------------------------------------------------------
......@@ -450,11 +450,11 @@ BrookFloatStreamInternal** BrookBonded::getInverseStreamMapsStreams( int index )
/*Flips i,j,k,l to l,k,j,i while correctly shuffling the params */
void BrookBonded::flipQuartet( int ibonded, int *particles ){
void BrookBonded::_flipQuartet( int ibonded, int *particles ){
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "BrookBonded::flipQuartet";
// static const std::string methodName = "BrookBonded::_flipQuartet";
// ---------------------------------------------------------------------------------------
......@@ -473,11 +473,11 @@ void BrookBonded::flipQuartet( int ibonded, int *particles ){
}
int BrookBonded::matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles ){
int BrookBonded::_matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles ){
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "BrookBonded::matchTorsion";
// static const std::string methodName = "BrookBonded::_matchTorsion";
// ---------------------------------------------------------------------------------------
......@@ -516,13 +516,13 @@ int BrookBonded::matchTorsion( int i, int j, int k, int l, int nbondeds, int *pa
*
**/
int BrookBonded::matchAngle( int i, int j, int k, int nbondeds,
int BrookBonded::_matchAngle( int i, int j, int k, int nbondeds,
int *particles, int *flag ){
// ---------------------------------------------------------------------------------------
int n;
static const std::string methodName = "BrookBonded::matchAngle";
static const std::string methodName = "BrookBonded::_matchAngle";
// ---------------------------------------------------------------------------------------
......@@ -572,11 +572,11 @@ int BrookBonded::matchAngle( int i, int j, int k, int nbondeds,
*
* */
int BrookBonded::matchBond( int i, int j, int nbondeds, int *particles, int *flag ){
int BrookBonded::_matchBond( int i, int j, int nbondeds, int *particles, int *flag ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::matchBond";
static const std::string methodName = "BrookBonded::_matchBond";
// ---------------------------------------------------------------------------------------
......@@ -638,11 +638,11 @@ int BrookBonded::matchBond( int i, int j, int nbondeds, int *particles, int *fla
return ErrorReturnValue;
}
int BrookBonded::matchPair( int i, int j, int nbondeds, int *particles ){
int BrookBonded::_matchPair( int i, int j, int nbondeds, int *particles ){
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "BrookBonded::matchPair";
//static const std::string methodName = "BrookBonded::_matchPair";
// ---------------------------------------------------------------------------------------
......@@ -699,13 +699,13 @@ int BrookBonded::matchPair( int i, int j, int nbondeds, int *particles ){
*
*/
int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[],
int BrookBonded::_addRBTorsions( int *nbondeds, int *particles, float *params[],
const vector<vector<int> >& rbTorsionIndices,
const vector<vector<double> >& rbTorsionParameters ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::BrookBonded";
static const std::string methodName = "BrookBonded::_addRBTorsions";
static const int debug = 0;
// ---------------------------------------------------------------------------------------
......@@ -725,7 +725,7 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[],
int k = particlesIndices[index++];
int l = particlesIndices[index++];
int ibonded = matchTorsion( i, j, k, l, *nbondeds, particles );
int ibonded = _matchTorsion( i, j, k, l, *nbondeds, particles );
if( ibonded < 0 ){
ibonded = *nbondeds;
......@@ -767,13 +767,13 @@ int BrookBonded::addRBTorsions( int *nbondeds, int *particles, float *params[],
*
*/
int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
int BrookBonded::_addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
const vector<vector<int> >& periodicTorsionIndices,
const vector<vector<double> >& periodicTorsionParameters ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addPTorsion";
static const std::string methodName = "BrookBonded::_addPTorsion";
static const int debug = 0;
// ---------------------------------------------------------------------------------------
......@@ -793,7 +793,7 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat*
int k = particlesIndices[index++];
int l = particlesIndices[index++];
int ibonded = matchTorsion( i, j, k, l, *nbondeds, particles );
int ibonded = _matchTorsion( i, j, k, l, *nbondeds, particles );
if( ibonded < 0 ){
ibonded = *nbondeds;
......@@ -833,12 +833,12 @@ int BrookBonded::addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat*
*
*/
int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], const std::vector<std::vector<int> >& angleIndices,
int BrookBonded::_addAngles( int *nbondeds, int *particles, float *params[], const std::vector<std::vector<int> >& angleIndices,
const std::vector<std::vector<double> >& angleParameters ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addAngles";
static const std::string methodName = "BrookBonded::_addAngles";
static const int debug = 0;
// ---------------------------------------------------------------------------------------
......@@ -860,7 +860,7 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons
int k = particlesIndices[index++];
int flag;
int ibonded = matchAngle( i, j, k, *nbondeds, particles, &flag );
int ibonded = _matchAngle( i, j, k, *nbondeds, particles, &flag );
if( ibonded < 0 ){
ibonded = *nbondeds;
ATOMS( ibonded, 0 ) = i;
......@@ -895,12 +895,12 @@ int BrookBonded::addAngles( int *nbondeds, int *particles, float *params[], cons
*
*/
int BrookBonded::addBonds( int *nbondeds, int *particles, float *params[], const vector<vector<int> >& bondIndices,
int BrookBonded::_addBonds( int *nbondeds, int *particles, float *params[], const vector<vector<int> >& bondIndices,
const vector<vector<double> >& bondParameters ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addBonds";
static const std::string methodName = "BrookBonded::_addBonds";
static const int debug = 0;
// ---------------------------------------------------------------------------------------
......@@ -929,7 +929,7 @@ int BrookBonded::addBonds( int *nbondeds, int *particles, float *params[], const
}
int flag;
int ibonded = matchBond( i, j, *nbondeds, particles, &flag );
int ibonded = _matchBond( i, j, *nbondeds, particles, &flag );
int saveIbond = ibonded;
if( ibonded < 0 ){
ibonded = *nbondeds;
......@@ -972,14 +972,14 @@ int saveIbond = ibonded;
*
*/
int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
int BrookBonded::_addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[],
BrookOpenMMFloat* charges,
const std::vector<std::vector<int> >& bonded14Indices,
const std::vector<std::vector<double> >& nonbondedParameters ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::addPairs";
static const std::string methodName = "BrookBonded::_addPairs";
static const double oneSixth = 1.0/6.0;
static const int debug = 1;
......@@ -994,7 +994,7 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para
int i = bonded14Indices[ii][0];
int j = bonded14Indices[ii][1];
int ibonded = matchPair( i, j, *nbondeds, particles );
int ibonded = _matchPair( i, j, *nbondeds, particles );
if( ibonded < 0 ){
ibonded = *nbondeds;
ATOMS(ibonded, 0) = i;
......@@ -1048,11 +1048,11 @@ int BrookBonded::addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* para
*
*/
int BrookBonded::loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize ){
int BrookBonded::_loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookBonded::loadInvMaps";
static const std::string methodName = "BrookBonded::_loadInvMaps";
static const int PrintOn = 0;
double dangleValue = 0.0;
......@@ -1113,9 +1113,9 @@ int BrookBonded::loadInvMaps( int nbondeds, int nparticles, int *particles, int
for( int jj = 0; jj < 4*getMaxInverseMapStreamCount()*particleStreamSize; jj++ ){
block[jj] = -1.0f;
}
gpuCalcInvMap( ii, 4, nbondeds, nparticles, particles, getInverseMapStreamCount( ii ), counts, invmaps, &(_inverseMapStreamCount[ii]) );
_gpuCalcInvMap( ii, 4, nbondeds, nparticles, particles, getInverseMapStreamCount( ii ), counts, invmaps, &(_inverseMapStreamCount[ii]) );
//gpuPrintInvMaps( _inverseMapStreamCount[ii], nparticles, counts, invmaps, getLog() );
validateInverseMapStreamCount( ii, _inverseMapStreamCount[ii] );
_validateInverseMapStreamCount( ii, _inverseMapStreamCount[ii] );
for( int jj = 0; jj < _inverseMapStreamCount[ii]; jj++ ){
_inverseStreamMaps[ii][jj]->loadFromArray( invmaps[jj] );
......@@ -1263,19 +1263,19 @@ int BrookBonded::setup( int numberOfParticles,
int nbondeds = 0;
if( rbTorsionBrookBondParameters ){
addRBTorsions( &nbondeds, particles, params, rbTorsionBrookBondParameters->getParticleIndices(), rbTorsionBrookBondParameters->getBondParameters() );
_addRBTorsions( &nbondeds, particles, params, rbTorsionBrookBondParameters->getParticleIndices(), rbTorsionBrookBondParameters->getBondParameters() );
}
if( periodicTorsionBrookBondParameters ){
addPTorsions( &nbondeds, particles, params, periodicTorsionBrookBondParameters->getParticleIndices(), periodicTorsionBrookBondParameters->getBondParameters() );
_addPTorsions( &nbondeds, particles, params, periodicTorsionBrookBondParameters->getParticleIndices(), periodicTorsionBrookBondParameters->getBondParameters() );
}
if( harmonicAngleBrookBondParameters ){
addAngles( &nbondeds, particles, params, harmonicAngleBrookBondParameters->getParticleIndices(), harmonicAngleBrookBondParameters->getBondParameters() );
_addAngles( &nbondeds, particles, params, harmonicAngleBrookBondParameters->getParticleIndices(), harmonicAngleBrookBondParameters->getBondParameters() );
}
if( harmonicBondBrookBondParameters ){
addBonds( &nbondeds, particles, params, harmonicBondBrookBondParameters->getParticleIndices(), harmonicBondBrookBondParameters->getBondParameters() );
_addBonds( &nbondeds, particles, params, harmonicBondBrookBondParameters->getParticleIndices(), harmonicBondBrookBondParameters->getBondParameters() );
}
if( nonBonded14ForceParameters ){
addPairs( &nbondeds, particles, params, charges, nonBonded14ForceParameters->getParticleIndices(), nonBonded14ForceParameters->getBondParameters() );
_addPairs( &nbondeds, particles, params, charges, nonBonded14ForceParameters->getParticleIndices(), nonBonded14ForceParameters->getBondParameters() );
}
// ---------------------------------------------------------------------------------------
......@@ -1387,7 +1387,7 @@ int BrookBonded::setup( int numberOfParticles,
// load inverse maps to streams
loadInvMaps( nbondeds, getNumberOfParticles(), particles, particleStreamWidth, particleStreamSize );
_loadInvMaps( nbondeds, getNumberOfParticles(), particles, particleStreamWidth, particleStreamSize );
// ---------------------------------------------------------------------------------------
......@@ -1531,7 +1531,7 @@ std::string BrookBonded::getContentsString( int level ) const {
*
**/
int BrookBonded::gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int BrookBonded::_gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int *particles, int nmaps, int counts[], float4 *invmaps[],
int *nimaps ){
......@@ -1541,7 +1541,7 @@ int BrookBonded::gpuCalcInvMap( int posflag, int niparticles, int nints, int npa
int particle;
int mapnum, mapcomp;
static const std::string methodName = "BrookBonded::gpuCalcInvMap";
static const std::string methodName = "BrookBonded::_gpuCalcInvMap";
static const unsigned int MAX_LINE_CHARS = 256;
//char value[MAX_LINE_CHARS];
......@@ -1654,7 +1654,7 @@ if( PrintOn && getLog() ){
}
void BrookBonded::gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile ){
void BrookBonded::_gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile ){
int i;
int j;
for( i = 0; i < nparticles; i++ ){
......@@ -1676,7 +1676,7 @@ void BrookBonded::gpuPrintInvMaps( int nmaps, int nparticles, int counts[], floa
* which should always be true
* */
int BrookBonded::gpuCalcInvMap_merged(
int BrookBonded::_gpuCalcInvMap_merged(
int nints, //number of interactions
int nparticles, //number of particles
int *particles, //ijkl,ijkl,ijkl...
......@@ -1754,7 +1754,7 @@ int BrookBonded::gpuCalcInvMap_merged(
*
* buf should be nimaps * nparticles large.
* */
int BrookBonded::gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts,
int BrookBonded::_gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts,
float4 *invmaps[], float4 *buf ){
int i, j;
int nmaps_i;
......
......@@ -35,7 +35,6 @@
#include <vector>
#include "BrookStreamImpl.h"
//#include "BrookPlatform.h"
#include "BrookCommon.h"
#include "OpenMMContext.h"
#include "BrookBondParameters.h"
......@@ -43,7 +42,7 @@
namespace OpenMM {
/**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
* Calculate Brook bonded forces
*/
class BrookBonded : public BrookCommon {
......@@ -68,33 +67,19 @@ class BrookBonded : public BrookCommon {
/**
* Initialize the kernel, setting up the values of all the force field parameters.
*
* @param bondIndices the two particles connected by each bond term
* @param bondParameters the force parameters (length, k) for each bond term
* @param angleIndices the three particles connected by each angle term
* @param angleParameters the force parameters (angle, k) for each angle term
* @param periodicTorsionIndices the four particles connected by each periodic torsion term
* @param periodicTorsionParameters the force parameters (k, phase, periodicity) for each periodic torsion term
* @param rbTorsionIndices the four particles connected by each Ryckaert-Bellemans torsion term
* @param rbTorsionParameters the coefficients (in order of increasing powers) for each Ryckaert-Bellemans torsion term
* @param bonded14Indices each element contains the indices of two particles whose nonbonded interactions should be reduced since
* they form a bonded 1-4 pair
* @param nonbondedParameters the nonbonded force parameters (charge, sigma, epsilon) for each particle
* @param log log reference
* @param numberOfParticles numberOfParticles
* @param harmonicBondBrookBondParameters force parameters (length, k) for each bond term
* @param harmonicAngleBrookBondParameters force parameters (angle, k) for each angle term
* @param periodicTorsionBrookBondParameters force parameters (k, phase, periodicity) for each periodic torsion term
* @param rbTorsionBrookBondParameters coefficients (in order of increasing powers) for each Ryckaert-Bellemans torsion term
* @param nonBonded14ForceParameters nonbonded 14 force parameters (charge, sigma, epsilon) for each particle
* @param particleStreamWidth stream width
* @param particleStreamSize stream size
*
* @return nonzero value if error
*
*/
/*
int setup( int numberOfParticles,
const std::vector<std::vector<int> >& bondIndices, const std::vector<std::vector<double> >& bondParameters,
const std::vector<std::vector<int> >& angleIndices, const std::vector<std::vector<double> >& angleParameters,
const std::vector<std::vector<int> >& periodicTorsionIndices, const std::vector<std::vector<double> >& periodicTorsionParameters,
const std::vector<std::vector<int> >& rbTorsionIndices, const std::vector<std::vector<double> >& rbTorsionParameters,
const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<double> >& nonbondedParameters,
const Platform& platform );
*/
int setup( int numberOfParticles,
BrookBondParameters* harmonicBondBrookBondParameters,
BrookBondParameters* harmonicAngleBrookBondParameters,
......@@ -316,11 +301,12 @@ class BrookBonded : public BrookCommon {
// helper methods in setup of parameters
void flipQuartet( int ibonded, int *particles );
int matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles );
int matchAngle( int i, int j, int k, int nbondeds, int *particles, int *flag );
int matchBond( int i, int j, int nbondeds, int *particles, int *flag );
int matchPair( int i, int j, int nbondeds, int *particles );
void _flipQuartet( int ibonded, int *particles );
int _matchTorsion( int i, int j, int k, int l, int nbondeds, int *particles );
int _matchAngle( int i, int j, int k, int nbondeds, int *particles, int *flag );
int _matchBond( int i, int j, int nbondeds, int *particles, int *flag );
int _matchPair( int i, int j, int nbondeds, int *particles );
/**
* Setup Ryckaert-Bellemans parameters/particle indices
......@@ -335,7 +321,7 @@ class BrookBonded : public BrookCommon {
*
*/
int addRBTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& rbTorsionIndices,
int _addRBTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& rbTorsionIndices,
const std::vector<std::vector<double> >& rbTorsionParameters );
/**
......@@ -351,7 +337,7 @@ class BrookBonded : public BrookCommon {
*
*/
int addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& periodicTorsionIndices,
int _addPTorsions( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& periodicTorsionIndices,
const std::vector<std::vector<double> >& periodicTorsionParameters );
/**
......@@ -367,7 +353,7 @@ class BrookBonded : public BrookCommon {
*
*/
int addAngles( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& angleIndices,
int _addAngles( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& angleIndices,
const std::vector<std::vector<double> >& angleParameters );
/**
......@@ -383,7 +369,7 @@ class BrookBonded : public BrookCommon {
*
*/
int addBonds( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& bondIndices,
int _addBonds( int *nbondeds, int *particles, BrookOpenMMFloat* params[], const std::vector<std::vector<int> >& bondIndices,
const std::vector<std::vector<double> >& bondParameters );
/**
......@@ -401,7 +387,7 @@ class BrookBonded : public BrookCommon {
*
*/
int addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[], BrookOpenMMFloat* charges,
int _addPairs( int *nbondeds, int *particles, BrookOpenMMFloat* params[], BrookOpenMMFloat* charges,
const std::vector<std::vector<int> >& bonded14Indices, const std::vector<std::vector<double> >& nonbondedParameters );
/**
......@@ -418,7 +404,7 @@ class BrookBonded : public BrookCommon {
*/
//int loadInvMaps( int nbondeds, int nparticles, int *particles, const BrookPlatform& platform );
int loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize );
int _loadInvMaps( int nbondeds, int nparticles, int *particles, int particleStreamWidth, int particleStreamSize );
/**
* Validate inverse map count
......@@ -432,7 +418,7 @@ class BrookBonded : public BrookCommon {
*
*/
int validateInverseMapStreamCount( int index, int count ) const;
int _validateInverseMapStreamCount( int index, int count ) const;
/*
* Helper functions for building inverse maps for
......@@ -462,28 +448,31 @@ class BrookBonded : public BrookCommon {
*
**/
int gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int *particles, int nmaps, int counts[], float4 *invmaps[],
int *nimaps );
int _gpuCalcInvMap( int posflag, int niparticles, int nints, int nparticles,
int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
void gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile );
void _gpuPrintInvMaps( int nmaps, int nparticles, int counts[], float4 *invmap[], FILE* logFile );
/* We are still plagued by kernel call overheads. This is for a big fat
/*
* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true
* */
int gpuCalcInvMap_merged( int nints, int nparticles, int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
*
**/
int _gpuCalcInvMap_merged( int nints, int nparticles, int *particles, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
/* Repacks the invmap streams for more efficient access in the
/*
* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel
*
* buf should be nimaps * nparticles large.
* */
int gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts, float4 *invmaps[], float4 *buf );
*
**/
int _gpuRepackInvMap_merged( int nparticles, int nmaps, int *counts, float4 *invmaps[], float4 *buf );
};
......
......@@ -141,20 +141,19 @@ void BrookCalcGBSAOBCForceKernel::initialize( const System& system, const GBSAOB
// and initialize brookGbsa
_numberOfParticles = system.getNumParticles();
std::vector<std::vector<double> > particleParameters;
std::vector<std::vector<double> > particleParameters( _numberOfParticles );
for( int ii = 0; ii < _numberOfParticles; ii++ ){
double charge, radius, scalingFactor;
force.getParticleParameters( ii, charge, radius, scalingFactor );
std::vector<double> parameters;
particleParameters[ii] = parameters;
parameters[0] = charge;
parameters[1] = radius;
parameters[2] = scalingFactor;
particleParameters[ii].push_back( charge );
particleParameters[ii].push_back( radius );
particleParameters[ii].push_back( scalingFactor );
}
brookGbsa.setup( particleParameters, force.getSolventDielectric(), force.getSoluteDielectric(), getPlatform() );
brookGbsa.setIsActive( 1 );
_openMMBrookInterface.setTriggerForceKernel( this );
_openMMBrookInterface.setTriggerEnergyKernel( this );
......
......@@ -107,12 +107,14 @@ void BrookCalcKineticEnergyKernel::initialize( const System& system ){
_masses[ii] = static_cast<BrookOpenMMFloat>(system.getParticleMass(ii));
}
/*
std::vector<double> masses;
for( unsigned int ii = 0; ii < (unsigned int) _numberOfParticles; ii++ ){
masses.push_back( system.getParticleMass(ii) );
}
_brookVelocityCenterOfMassRemoval = new BrookVelocityCenterOfMassRemoval();
_brookVelocityCenterOfMassRemoval->setup( masses, getPlatform() );
*/
return;
}
......@@ -166,10 +168,11 @@ printf( "NewCom [%12.5e %12.5e %12.5e]\n", newcom[0], newcom[1], newcom[2] );
index = 0;
*/
for( int ii = 0; ii < _numberOfParticles; ii++, index += 3 ){
energy += _masses[ii]*(velocity[index]*velocity[index] + velocity[index + 1]*velocity[index + 1] + velocity[index + 2]*velocity[index + 2]);
}
//printf( " KQ=%12.5e\n", 0.5*energy );
printf( " KQ=%12.5e\n", 0.5*energy );
return 0.5*energy;
}
......@@ -904,56 +904,6 @@ std::string BrookGbsa::getContentsString( int level ) const {
return message.str();
}
/*
* Calculate OBC energy
*
* @param particlePositions particle positions
* @return energy
*
* @throw OpenMMException if _cpuObc or charges are not set
*
* */
double BrookGbsa::getEnergy( const Stream& particlePositions ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookGbsa::getEnergy";
// ---------------------------------------------------------------------------------------
// validate initialization
if( _cpuObc == NULL ){
std::stringstream message;
message << methodName << " _cpuObc not set.";
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
if( _charges == NULL ){
std::stringstream message;
message << methodName << " charges not set.";
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
const BrookStreamImpl& positionStreamC = dynamic_cast<const BrookStreamImpl&> (particlePositions.getImpl());
BrookStreamImpl& positionStream = const_cast<BrookStreamImpl&> (positionStreamC);
BrookOpenMMFloat* positionsF = (BrookOpenMMFloat*) positionStream.getData();
RealOpenMM** positions = copy1DArrayTo2DArray( positionStream.getSize(), 3, positionsF );
RealOpenMM** forces = allocateRealArray( positionStream.getSize(), 3 );
_cpuObc->computeImplicitSolventForces( positions, _charges, forces, 1 );
disposeRealArray( forces );
disposeRealArray( positions );
return _cpuObc->getEnergy();
}
/**
* Compute forces
*
......@@ -964,7 +914,7 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
// ---------------------------------------------------------------------------------------
static const std::string methodName = "BrookGbsa::executeForces";
static const int PrintOn = 0;
static const int PrintOn = 1;
float mergeNonObcForces = 1.0f;
float kcalMolTokJNM = -0.4184f;
......@@ -983,13 +933,36 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
(float) getParticleStreamWidth( ),
(float) getPartialForceStreamWidth( ),
positionStream.getBrookStream(),
getObcParticleRadii()->getBrookStream(),
getObcScaledParticleRadii()->getBrookStream(),
// NOTE: obcParticleRadiiWithDielectricOffset & obcScaledParticleRadii may be wrong!! or br files need editing
// gpuObc->getBrookStreamWrapperAtIndex( GpuObc::obcParticleRadiiWithDielectricOffset )->getStream(),
// gpuObc->getBrookStreamWrapperAtIndex( GpuObc::obcScaledParticleRadii )->getStream(),
gbsaForceStreams[0]->getBrookStream() );
// ---------------------------------------------------------------------------------------
// diagnostics
if( 0 && PrintOn && getLog() ){
(void) fprintf( getLog(), "\n%s Post kCalculateBornRadii: atms=%d ceil=%d dup=%d particleStrW=%3d prtlF=%3d diel=%.3f %.3f ACE=%.1f\n",
methodName.c_str(), getNumberOfParticles(),
getParticleSizeCeiling(),
getDuplicationFactor(),
getParticleStreamWidth( ),
getPartialForceStreamWidth( ) );
BrookStreamInternal* brookStreamInternalF = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalF->printToFile( getLog() );
(void) fprintf( getLog(), "\nRadii\n" );
getObcParticleRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcScaledParticleRadii\n" );
getObcScaledParticleRadii()->printToFile( getLog() );
}
// ---------------------------------------------------------------------------------------
kPostCalculateBornRadii_nobranch(
(float) getDuplicationFactor(),
(float) getParticleStreamWidth( ),
......@@ -1004,6 +977,37 @@ void BrookGbsa::computeForces( BrookStreamImpl& positionStream, BrookStreamImpl&
getObcBornRadii()->getBrookStream(),
getObcChain()->getBrookStream() );
// ---------------------------------------------------------------------------------------
// diagnostics
if( 0 && PrintOn && getLog() ){
(void) fprintf( getLog(), "\n%s Post kPostCalculateBornRadii_nobranch: atms=%d ceil=%d dup=%d particleStrW=%3d prtlF=%3d diel=%.3f %.3f ACE=%.1f\n",
methodName.c_str(), getNumberOfParticles(),
getParticleSizeCeiling(),
getDuplicationFactor(),
getParticleStreamWidth( ),
getPartialForceStreamWidth( ) );
BrookStreamInternal* brookStreamInternalF = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalF->printToFile( getLog() );
(void) fprintf( getLog(), "\nInput\n" );
gbsaForceStreams[0]->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcParticleRadii\n" );
getObcParticleRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nBornR\n" );
getObcBornRadii()->printToFile( getLog() );
(void) fprintf( getLog(), "\nObcChain\n" );
getObcChain()->printToFile( getLog() );
}
// ---------------------------------------------------------------------------------------
// seecond major loop
......
......@@ -163,12 +163,12 @@ void BrookIntegrateVerletStepKernel::initialize( const System& system, const Ve
_brookVerletDynamics->setLog( log );
_brookShakeAlgorithm = new BrookShakeAlgorithm( );
_brookShakeAlgorithm->setLog( log );
_brookShakeAlgorithm->setup( masses, constraintIndicesVector, constraintLengths, getPlatform() );
BrookOpenMMFloat tolerance = static_cast<BrookOpenMMFloat>( integrator.getConstraintTolerance() );
_brookShakeAlgorithm->setShakeTolerance( tolerance );
_brookShakeAlgorithm->setMaxIterations( 30 );
_brookShakeAlgorithm->setLog( log );
if( printOn && log ){
(void) fprintf( log, "%s done w/ setup: particles=%d const=%d\n", methodName.c_str(), numberOfParticles, numberOfConstraints );
......
......@@ -370,7 +370,6 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// ---------------------------------------------------------------------------------------
static const char* methodName = "\nBrookLangevinDynamics::update";
static const int PrintOn = 0;
// ---------------------------------------------------------------------------------------
......@@ -427,8 +426,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// diagnostics
if( PrintOn ){
(void) fprintf( getLog(), "\nPost kupdate_sd1_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"EM=%12.5e Sd1pc[]=[%12.5e %12.5e %12.5e]",
(void) fprintf( getLog(), "\n%s Post kupdate_sd1_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"EM=%12.5e Sd1pc[]=[%12.5e %12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(),
......@@ -477,10 +476,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// first Shake step
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(),
(float) getLangevinDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(),
......@@ -503,6 +502,37 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
brookShakeAlgorithm.getShakeXCons2Stream()->getBrookStream(),
brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(),
getXPrimeStream()->getBrookStream() );
if( ( 0 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d",
methodName, getLangevinDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\nShakeInverseMapStream\n" );
brookShakeAlgorithm.getShakeInverseMapStream()->printToFile( getLog() );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons0\n" );
brookShakeAlgorithm.getShakeXCons0Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons1\n" );
brookShakeAlgorithm.getShakeXCons1Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons2\n" );
brookShakeAlgorithm.getShakeXCons2Stream()->printToFile( getLog() );
(void) fprintf( getLog(), "\nShakeXCons3\n" );
brookShakeAlgorithm.getShakeXCons3Stream()->printToFile( getLog() );
}
}
// second integration step
......@@ -527,8 +557,8 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// diagnostics
if( PrintOn ){
(void) fprintf( getLog(), "\nPost kupdate_sd2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"Sd2pc[]=[%12.5e %12.5e]",
(void) fprintf( getLog(), "\n%s Post kupdate_sd2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"Sd2pc[]=[%12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(),
......@@ -568,10 +598,10 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
// second Shake step
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(),
(float) getLangevinDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(),
......@@ -595,6 +625,31 @@ int BrookLangevinDynamics::update( BrookStreamImpl& positionStream, BrookStreamI
brookShakeAlgorithm.getShakeXCons3Stream()->getBrookStream(),
positionStream.getBrookStream() );
// diagnostics
if( PrintOn ){
(void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d rngStrW=%3d rngOff=%5d "
"Sd2pc[]=[%12.5e %12.5e]", methodName,
getLangevinDynamicsParticleStreamWidth(),
brookRandomNumberGenerator.getRandomNumberStreamWidth(),
brookRandomNumberGenerator.getRvStreamOffset(),
derivedParameters[Sd2pc1], derivedParameters[Sd2pc2] );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
brookStreamInternalPos = velocityStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nVelocityStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
// brookRandomNumberGenerator.getRandomNumberStream( brookRandomNumberGenerator.getRvStreamIndex() )->printToFile( getLog() );
}
} else {
//kadd3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() );
ksetStr3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() );
......
......@@ -38,6 +38,39 @@
using namespace OpenMM;
using namespace std;
struct ShakeCluster {
int _centralID;
int _peripheralID[3];
int _size;
float _distance;
float _centralInvMass, _peripheralInvMass;
ShakeCluster(){};
ShakeCluster( int centralID, float invMass) : _centralID(centralID), _centralInvMass(invMass), _size(0) {};
void addAtom( int id, float dist, float invMass){
if( _size == 3 ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << "atom " << id << " has more than 3 constraints!." << std::endl;
throw OpenMMException( message.str() );
}
if( _size > 0 && dist != _distance ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << "atom " << id << " has different constraint distances: " << dist << " and " << _distance << std::endl;
throw OpenMMException( message.str() );
}
if( _size > 0 && invMass != _peripheralInvMass ){
std::stringstream message;
message << "ShakeCluster::addAtom: " << " constrainted atoms associated w/ atom " << id << " have different masses: " << invMass << " and " << _peripheralInvMass << std::endl;
throw OpenMMException( message.str() );
}
_peripheralID[_size++] = id;
_distance = dist;
_peripheralInvMass = invMass;
}
};
/**
*
* Constructor
......@@ -74,8 +107,6 @@ BrookShakeAlgorithm::BrookShakeAlgorithm( ){
_shakeStreams[ii] = NULL;
}
_inverseHydrogenMass = one/( (BrookOpenMMFloat) 1.008);
// setup inverse sqrt masses
_inverseSqrtMasses = NULL;
......@@ -164,17 +195,6 @@ int BrookShakeAlgorithm::setShakeTolerance( BrookOpenMMFloat tolerance ){
return DefaultReturnValue;
}
/**
* Get inverse of hydrogen mass
*
* @return inverse of hydrogen mass
*
*/
BrookOpenMMFloat BrookShakeAlgorithm::getInverseHydrogenMass( void ) const {
return _inverseHydrogenMass;
}
/**
* Get Particle stream size
*
......@@ -412,6 +432,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){
* @param masses masses
* @param constraintIndices constraint particle indices
* @param constraintLengths constraint lengths
* @param platform platform reference
*
* @return ErrorReturnValue if error
*
......@@ -420,7 +441,7 @@ int BrookShakeAlgorithm::_initializeStreams( const Platform& platform ){
*/
int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices,
const std::vector<double>& constraintLengths ){
const std::vector<double>& constraintLengths, const Platform& platform ){
// ---------------------------------------------------------------------------------------
......@@ -431,9 +452,7 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co
// ---------------------------------------------------------------------------------------
int shakeParticleStreamSize = getShakeParticleStreamSize();
int shakeConstraintStreamSize = getShakeConstraintStreamSize();
FILE* log = getLog();
// check that number of constraints for two input vectors is consistent
if( constraintIndices.size() != constraintLengths.size() ){
......@@ -443,94 +462,127 @@ int BrookShakeAlgorithm::_setShakeStreams( const std::vector<double>& masses, co
return ErrorReturnValue;
}
// allocate arrays to be read down to board
BrookOpenMMFloat* particleIndices = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat* shakeParameters = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
for( int ii = 0; ii < 4*shakeConstraintStreamSize; ii++ ){
particleIndices[ii] = (BrookOpenMMFloat) -1;
}
memset( shakeParameters, 0, 4*shakeConstraintStreamSize*sizeof( BrookOpenMMFloat ) );
// get constraint count for each atom
vector<int> constraintCount( masses.size(), 0);
std::vector< std::vector<int> >::const_iterator particleIterator = constraintIndices.begin();
std::vector<double>::const_iterator distanceIterator = constraintLengths.begin();
int constraintIndex = 0;
_numberOfConstraints = 0;
while( particleIterator != constraintIndices.end() ){
std::vector<int> particleVector = *particleIterator;
int atomI = particleVector[0];
int atomJ = particleVector[1];
// check that array of indices is not too small or large
// check that atom indices are not out of range
if( particleVector.size() < 2 ){
if( atomI < 0 || atomI > (int) masses.size() ){
std::stringstream message;
message << methodName << " particleIndices size=" << particleVector.size() << " is too small at constraintIndex=" << (constraintIndex/4);
message << methodName << " constraint I-index=" << atomI << " is out of range [0, " << masses.size() << "]" << std::endl;
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
if( particleVector.size() > 4 ){
if( atomJ < 0 || atomJ > (int) masses.size() ){
std::stringstream message;
message << methodName << " particleIndices size=" << particleVector.size() << " is too large at constraintIndex=" << (constraintIndex/4);
message << methodName << " constraint J-index=" << atomJ << " is out of range [0, " << masses.size() << "]" << std::endl;
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
int index = 0;
int particleIndex1 = -1;
int particleIndex2 = -1;
for( std::vector<int>::const_iterator ii = particleVector.begin(); ii != particleVector.end(); ii++, index++ ){
particleIndices[constraintIndex + index] = (BrookOpenMMFloat) *ii;
if( index == 0 ){
particleIndex1 = *ii;
} else if( index == 1 ){
particleIndex2 = *ii;
constraintCount[atomI]++;
constraintCount[atomJ]++;
particleIterator++;
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
map<int, ShakeCluster> clusters;
particleIterator = constraintIndices.begin();
std::vector<double>::const_iterator distanceIterator = constraintLengths.begin();
while( particleIterator != constraintIndices.end() ){
float distance = static_cast<float>( *distanceIterator );
std::vector<int> particleVector = *particleIterator;
int atomI = particleVector[0];
int atomJ = particleVector[1];
// Determine the central atom.
bool firstIsCentral;
if( constraintCount[atomI] > 1 ){
firstIsCentral = true;
} else if (constraintCount[atomJ] > 1 ){
firstIsCentral = false;
} else if( atomI < atomJ ){
firstIsCentral = true;
} else {
firstIsCentral = false;
}
int centralID, peripheralID;
float centralInvMass, peripheralInvMass;
if( firstIsCentral ){
centralID = atomI;
peripheralID = atomJ;
centralInvMass = static_cast<float>( masses[atomI] );
peripheralInvMass = static_cast<float>( masses[atomJ] );
} else {
centralID = atomJ;
peripheralID = atomI;
centralInvMass = static_cast<float>( masses[atomJ] );
peripheralInvMass = static_cast<float>( masses[atomI] );
}
if( constraintCount[peripheralID] != 1 ){
throw OpenMMException("Only bonds to hydrogens may be constrained");
}
// skip negative indices
// Add it to the cluster
if( clusters.find(centralID) == clusters.end() ){
clusters[centralID] = ShakeCluster( centralID, centralInvMass );
}
clusters[centralID].addAtom( peripheralID, distance, peripheralInvMass );
if( particleIndex1 < 0 || particleIndex2 < 0 ){
particleIterator++;
distanceIterator++;
continue;
}
if( particleIndex1 >= (int) masses.size() || particleIndex2 >= (int) masses.size() ){
std::stringstream message;
message << methodName << " constraint indices=[" << particleIndex1 << ", " << particleIndex2 << "] greater than mass array size=" << masses.size();
throw OpenMMException( message.str() );
return ErrorReturnValue;
}
// allocate space for streams
// insure heavy particle is first
_initializeStreamSizes( (int) masses.size(), (int) clusters.size(), platform );
_initializeStreams( platform );
if( masses[particleIndex1] < masses[particleIndex2] ){
int shakeParticleStreamSize = getShakeParticleStreamSize();
int shakeConstraintStreamSize = getShakeConstraintStreamSize();
BrookOpenMMFloat swap = particleIndices[constraintIndex];
particleIndices[constraintIndex] = particleIndices[constraintIndex+1];
particleIndices[constraintIndex+1] = swap;
// allocate arrays to be read down to board and initialize
int swapI = particleIndex1;
particleIndex1 = particleIndex2;
particleIndex2 = swapI;
BrookOpenMMFloat* particleIndices = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat* shakeParameters = new BrookOpenMMFloat[4*shakeConstraintStreamSize];
BrookOpenMMFloat minusOne = static_cast<BrookOpenMMFloat>(-1.0);
for( int ii = 0; ii < 4*shakeConstraintStreamSize; ii++ ){
particleIndices[ii] = minusOne;
}
memset( shakeParameters, 0, 4*shakeConstraintStreamSize*sizeof( BrookOpenMMFloat ) );
// set parameters:
// load indices & parameters
// (1) 1/(heavy particle mass)
// (2) 0.5/(heavy particle mass+hydrogen mass)
// (3) constraint distance**2
int constraintIndex = 0;
_numberOfConstraints = static_cast<int>(clusters.size());
for( map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter ){
shakeParameters[constraintIndex] = one/( (BrookOpenMMFloat) masses[particleIndex1] );
shakeParameters[constraintIndex+1] = half/( (BrookOpenMMFloat) (masses[particleIndex1] + masses[particleIndex2]) );
shakeParameters[constraintIndex+2] = (BrookOpenMMFloat) ( (*distanceIterator)*(*distanceIterator) );
const ShakeCluster& cluster = iter->second;
particleIndices[constraintIndex] = static_cast<BrookOpenMMFloat>( cluster._centralID );
for( int ii = 0; ii < cluster._size; ii++ ){
particleIndices[constraintIndex+ii+1] = static_cast<BrookOpenMMFloat>( cluster._peripheralID[ii] );
}
shakeParameters[constraintIndex] = one/( static_cast<BrookOpenMMFloat>( cluster._centralInvMass ) );
shakeParameters[constraintIndex+1] = half/( static_cast<BrookOpenMMFloat>(cluster._centralInvMass + cluster._peripheralInvMass) );
shakeParameters[constraintIndex+2] = static_cast<BrookOpenMMFloat>( cluster._distance*cluster._distance );
shakeParameters[constraintIndex+3] = one/( static_cast<BrookOpenMMFloat>( cluster._peripheralInvMass ) );
particleIterator++;
distanceIterator++;
constraintIndex += 4;
_numberOfConstraints++;
}
// write entries to board
......@@ -593,12 +645,7 @@ int BrookShakeAlgorithm::setup( const std::vector<double>& masses, const std::ve
int numberOfParticles = (int) masses.size();
setNumberOfParticles( numberOfParticles );
// set stream sizes and then create streams
_initializeStreamSizes( numberOfParticles, (int) constraintIndices.size(), platform );
_initializeStreams( platform );
_setShakeStreams( masses, constraintIndices, constraintLengths );
_setShakeStreams( masses, constraintIndices, constraintLengths, platform );
return DefaultReturnValue;
}
......@@ -634,6 +681,9 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
#define LOCAL_SPRINTF(a,b,c) sprintf( (a), (b), (c) );
#endif
(void) LOCAL_SPRINTF( value, "%d", getNumberOfConstraints() );
message << _getLine( tab, "Number of constraints:", value );
(void) LOCAL_SPRINTF( value, "%d", getNumberOfParticles() );
message << _getLine( tab, "Number of particles:", value );
......@@ -652,9 +702,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
(void) LOCAL_SPRINTF( value, "%d", getParticleStreamSize() );
message << _getLine( tab, "Particle stream size:", value );
(void) LOCAL_SPRINTF( value, "%d", getNumberOfConstraints() );
message << _getLine( tab, "Number of constraints:", value );
(void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamWidth() );
message << _getLine( tab, "Constraint stream width:", value );
......@@ -664,9 +711,6 @@ std::string BrookShakeAlgorithm::getContentsString( int level ) const {
(void) LOCAL_SPRINTF( value, "%d", getShakeConstraintStreamSize() );
message << _getLine( tab, "Constraint stream size:", value );
(void) LOCAL_SPRINTF( value, "%.5f", 1.0f/getInverseHydrogenMass() );
message << _getLine( tab, "H-mass:", value );
message << _getLine( tab, "Log:", (getLog() ? Set : NotSet) );
message << _getLine( tab, "ParticleIndices:", (getShakeParticleIndicesStream() ? Set : NotSet) );
......
......@@ -114,14 +114,6 @@ class BrookShakeAlgorithm : public BrookCommon {
int setShakeTolerance( BrookOpenMMFloat tolerance );
/**
* Get inverse of hydrogen mass
*
* @return inverse of hydrogen mass
*/
BrookOpenMMFloat getInverseHydrogenMass( void ) const;
/**
* Get Shake particle stream width
*
......@@ -291,10 +283,6 @@ class BrookShakeAlgorithm : public BrookCommon {
int _maxIterations;
// inverse of H mass
BrookOpenMMFloat _inverseHydrogenMass;
// particle stream dimensions
int _shakeParticleStreamWidth;
......@@ -361,6 +349,7 @@ class BrookShakeAlgorithm : public BrookCommon {
* @param masses masses
* @param constraintIndices constraint particle indices
* @param constraintLengths constraint lengths
* @param platform platform reference
*
* @return ErrorReturnValue if error
*
......@@ -369,7 +358,7 @@ class BrookShakeAlgorithm : public BrookCommon {
*/
int _setShakeStreams( const std::vector<double>& masses, const std::vector< std::vector<int> >& constraintIndices,
const std::vector<double>& constraintLengths );
const std::vector<double>& constraintLengths, const Platform& platform );
};
......
......@@ -38,10 +38,6 @@
#include "gpu/kupdatemd.h"
#include "gpu/kcommon.h"
// use random number generator
#include "../../reference/src/SimTKUtilities/SimTKOpenMMUtilities.h"
using namespace OpenMM;
using namespace std;
......@@ -162,7 +158,7 @@ int BrookVerletDynamics::updateParameters( double stepSize ){
return DefaultReturnValue;
};
}
/**
* Update
......@@ -182,8 +178,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
// ---------------------------------------------------------------------------------------
static const char* methodName = "\nBrookVerletDynamics::update";
static const int PrintOn = 1;
static std::string methodName = "\nBrookVerletDynamics::update";
static const int PrintOn = 0;
// ---------------------------------------------------------------------------------------
......@@ -193,9 +189,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
static int showAux = 1;
(void) fprintf( getLog(), "%s shake=%d\n", methodName, brookShakeAlgorithm.getNumberOfConstraints() );
(void) fflush( getLog() );
if( showAux ){
showAux = 0;
......@@ -203,15 +196,13 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
std::string contents = _brookVelocityCenterOfMassRemoval->getContentsString( );
(void) fprintf( getLog(), "%s VelocityCenterOfMassRemoval contents\n%s", methodName, contents.c_str() );
*/
std::string contents = brookShakeAlgorithm.getContentsString( );
(void) fprintf( getLog(), "%s Shake contents\n%s", methodName, contents.c_str() );
(void) fprintf( getLog(), "%s Shake contents\n%s", methodName, brookShakeAlgorithm.getContentsString().c_str() );
(void) fflush( getLog() );
}
}
// Shake step
// To Shake or not to Shake
if( brookShakeAlgorithm.getNumberOfConstraints() > 0 ){
......@@ -228,8 +219,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
// diagnostics
if( PrintOn && getLog() ){
(void) fprintf( getLog(), "\nPost kupdate_md_verlet: particleStrW=%3d step=%.5f",
getVerletDynamicsParticleStreamWidth(), getStepSize() );
(void) fprintf( getLog(), "\n%s Post kupdate_md_verlet: particleStrW=%3d step=%.5f",
methodName.c_str(), getVerletDynamicsParticleStreamWidth(), getStepSize() );
(void) fprintf( getLog(), "\nInverseMassStream\n" );
getInverseMassStream()->printToFile( getLog() );
......@@ -253,7 +244,6 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
kshakeh_fix1(
(float) brookShakeAlgorithm.getMaxIterations(),
(float) getVerletDynamicsParticleStreamWidth(),
brookShakeAlgorithm.getInverseHydrogenMass(),
brookShakeAlgorithm.getShakeTolerance(),
brookShakeAlgorithm.getShakeParticleIndicesStream()->getBrookStream(),
positionStream.getBrookStream(),
......@@ -266,8 +256,7 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
if( (1|| PrintOn) && getLog() ){
(void) fprintf( getLog(), "\nPost kshakeh_fix1: particleStrW=%3d invMass_H=%.5f",
getVerletDynamicsParticleStreamWidth(), brookShakeAlgorithm.getInverseHydrogenMass() );
(void) fprintf( getLog(), "\n%s Post kshakeh_fix1: particleStrW=%3d", methodName.c_str(), getVerletDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\nShakeParticleIndicesStream\n" );
brookShakeAlgorithm.getShakeParticleIndicesStream()->printToFile( getLog() );
......@@ -311,8 +300,8 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
if( ( 1 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\nPost kshakeh_update2_fix1: particleStrW=%3d",
getVerletDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\n%s Post kshakeh_update2_fix1: particleStrW=%3d",
methodName.c_str(), getVerletDynamicsParticleStreamWidth() );
(void) fprintf( getLog(), "\nShakeInverseMapStream\n" );
brookShakeAlgorithm.getShakeInverseMapStream()->printToFile( getLog() );
......@@ -339,15 +328,33 @@ int BrookVerletDynamics::update( BrookStreamImpl& positionStream, BrookStreamImp
}
// second integration step
kupdate_md2( (float) getStepSize(),
float inverseStepSize = 1.0f/getStepSize();
kupdate_md2( inverseStepSize,
getXPrimeStream()->getBrookStream(),
positionStream.getBrookStream(),
velocityStream.getBrookStream(),
positionStream.getBrookStream() );
if( ( 1 || PrintOn) && getLog() ){
(void) fprintf( getLog(), "\n%s Post kupdate_md2: inverseStepSize=%3e",
methodName.c_str(), inverseStepSize );
BrookStreamInternal* brookStreamInternalPos = positionStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nPositionStream\n" );
brookStreamInternalPos->printToFile( getLog() );
(void) fprintf( getLog(), "\nXPrimeStream\n" );
getXPrimeStream()->printToFile( getLog() );
brookStreamInternalPos = velocityStream.getBrookStreamImpl();
(void) fprintf( getLog(), "\nVelocityStream\n" );
brookStreamInternalPos->printToFile( getLog() );
}
} else {
//ksetStr3( getXPrimeStream()->getBrookStream(), positionStream.getBrookStream() );
float inverseStepSize = 1.0f/getStepSize();
kupdateMdNoShake( inverseStepSize,
kupdateMdNoShake( getStepSize(),
positionStream.getBrookStream(),
velocityStream.getBrookStream(),
forceStream.getBrookStream(),
......
......@@ -102,15 +102,6 @@ kernel void loop2Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jA
}
/****************************************************************
* This file is part of the gpu acceleration library for gromacs.
* Author: Mark Friedrichs
*
* This kernel was developed in collaboration with
*
* Copyright (C) Pande Group, Stanford, 2006
*****************************************************************/
kernel void loop2InternalR2( float4 r2, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
......@@ -355,196 +346,6 @@ kernel void bornSumInternal( float3 d1, float3 d2, float3 d3, float3 d4, float4
//60
}
// This kernel includes terms dL/dr & dU/dr from paper
// these values are zero, but included here to confirm that is the case
kernel void loop2InternalX( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2, l_ij3;
float4 u_ij, u_ij2, u_ij3;
float4 t2,t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
float4 diff_u2_l2;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask, t2Mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 );
rInverse = rsqrt( r2 );
r2Inverse = rInverse*rInverse;
rPlusScaledJ = r + jAtomicRadiiScaled;
rMinusScaledJ = r - jAtomicRadiiScaled;
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij;
l_ij2 = l_ij*l_ij;
u_ij = 1.0f/rPlusScaledJ;
u_ij2 = u_ij*u_ij;
diff_u2_l2 = u_ij2 - l_ij2;
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled;
logRatio = log(u_ij/l_ij);
// terms associated (t2) w/ dL/dr & dU/dr are zero; included here to test that true
// loop2Internal() should be used
t3 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse;
l_ij3 = l_ij2*l_ij;
// t1 in Tinker code
t2 = 0.5f*l_ij2 + 0.25f*scaledRadiusJ2*l_ij3/r - 0.25f*(l_ij/r + l_ij3*r);
t2Mask = iAtomicRadii4 < rMinusScaledJ ? t2 : zero4;
t3 += t2Mask;
// t2 in Tinker code
u_ij3 = u_ij2*u_ij;
t2 = -0.5f*u_ij2 - 0.25f*scaledRadiusJ2*u_ij3/r + 0.25f*(u_ij/r + u_ij3*r);
t3 += t2;
de = mask*bornForce*t3*rInverse;
// Born radius term
// bornSum = l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
bornSum = l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
t3 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4;
bornSum += t3;
bornSum *= mask;
}
/* This is a copy of loop2Internal(), but w/ the calculation of bornSum removed */
kernel void loop2InternalNoSumX( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2, l_ij3;
float4 u_ij, u_ij2, u_ij3;
float4 t2, t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask, t2Mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 );
rInverse = rsqrt( r2 );
r2Inverse = rInverse*rInverse;
rPlusScaledJ = r + jAtomicRadiiScaled;
rMinusScaledJ = r - jAtomicRadiiScaled;
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
// mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : one4;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij;
l_ij2 = l_ij*l_ij;
u_ij = 1.0f/rPlusScaledJ;
u_ij2 = u_ij*u_ij;
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled;
logRatio = log(u_ij/l_ij);
// terms associated w/ dL/dr & dU/dr are zero and not included
t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
l_ij3 = l_ij2*l_ij;
// t1 in Tinker code
t2 = 0.5f*l_ij2 + 0.25f*scaledRadiusJ2*l_ij3/r - 0.25f*(l_ij/r + l_ij3*r);
t2Mask = iAtomicRadii4 < rMinusScaledJ ? t2 : zero4;
t3 += t2Mask;
// t2 in Tinker code
u_ij3 = u_ij2*u_ij;
t2 = -0.5f*u_ij2 - 0.25f*scaledRadiusJ2*u_ij3/r + 0.25f*(u_ij/r + u_ij3*r);
t3 += t2;
de = mask*bornForce*t3*rInverse;
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
......@@ -971,8 +772,8 @@ kernel float4 scalar_force_CDLJMerge( float4 qq, float epsfac, float4 sig, float
//???? FLOPS
kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float streamWidth, float fstreamWidth, float3 posq[][],
float2 scaledAtomicRadii[][],
out float4 bornForce<> ){
// ---------------------------------------------------------------------------------------
......@@ -980,14 +781,11 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
float4 jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
......@@ -1043,42 +841,29 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// ---------------------------------------------------------------------------------------
// etch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch position, (atomic radius - dielectric offset )
// etch i2 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ];
i1AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// etch i3 position, Born prefactor, atomic radius
i2pos = posq[ iAtom ];
i2AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// etch i4 position, Born prefactor, atomic radius
i3pos = posq[ iAtom ];
i3AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
// create float4 for main vars
i4pos = posq[ iAtom ];
i4AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
bornForce = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// inner loop indices setup
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
......@@ -1101,29 +886,22 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
// gather required values
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j1pos = posq[ jAtom ];
jAtom.x += 1.0f;
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j2pos = posq[ jAtom ];
jAtom.x += 1.0f;
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j3pos = posq[ jAtom ];
jAtom.x += 1.0f;
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j4pos = posq[ jAtom ];
jAtom.x += 1.0f;
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// ---------------------------------------------------------------------------------------
......@@ -1190,1212 +968,3 @@ kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, floa
jAtom.y += 1.0f;
}
}
//???? FLOPS
kernel void kCalculateBornRadiiOk( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
//float4 jBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float tmp;
float tmp2;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// etch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
//jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
d1 = j1pos - i1pos; //3
d2 = j2pos - i1pos; //3
d3 = j3pos - i1pos; //3
d4 = j4pos - i1pos; //3
// i = 1
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, bornSum ); //156 : 368
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 2
d1 = j1pos - i2pos; //3
d2 = j2pos - i2pos; //3
d3 = j3pos - i2pos; //3
d4 = j4pos - i2pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, bornSum );//156 : 368
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 3
d1 = j1pos - i3pos; //3
d2 = j2pos - i3pos; //3
d3 = j3pos - i3pos; //3
d4 = j4pos - i3pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, bornSum );//156 : 368
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 4
d1 = j1pos - i4pos; //3
d2 = j2pos - i4pos; //3
d3 = j3pos - i4pos; //3
d4 = j4pos - i4pos; //3
// bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, bornSum );//156 : 366
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms based on unroll
duplicationFactor: ?
strheight: atom stream height
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
posq: atom positions and charge
atomicRadii: atomic radii
scaledAtomicRadii: scaled atomic radii
bornForceFactor: (Born radii)**2*bornForce*Obcforce
bornForce1: i-unroll first force component, including dBorn_r/dr in .w
bornForce2: i-unroll second force component, including dBorn_r/dr in .w
bornForce3: i-unroll third force component, including dBorn_r/dr in .w
bornForce4: i-unroll fourth force component, including dBorn_r/dr in .w
#endif
--------------------------------------------------------------------------------------- */
kernel void kObcLoop2Cdlj4( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth,
float jstreamWidth, float epsfac, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][], float4 isigeps[][],
float4 sigma[][], float4 epsilon[][], float excl[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
float i1q, i2q, i3q, i4q;
float i1sig, i2sig, i3sig, i4sig;
float i1eps, i2eps, i3eps, i4eps;
float j1q, j2q, j3q, j4q;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2, r2Masked;
float3 pad;
float4 exclmask;
float2 iatom;
float2 jstrindex;
float linind;
float4 jsig, jeps;
float4 jQ,qq;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, fs, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
const float4 exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// fetch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1q = posq[ iAtom ].w;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i1sig = jstrindex.x;
i1eps = jstrindex.y;
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2q = posq[ iAtom ].w;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i2sig = jstrindex.x;
i2eps = jstrindex.y;
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3q = posq[ iAtom ].w;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i3sig = jstrindex.x;
i3eps = jstrindex.y;
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4q = posq[ iAtom ].w;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i4sig = jstrindex.x;
i4eps = jstrindex.y;
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
whichRep = floor( forceIndex / roundedUpAtoms );
jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
exclind.x = iAtomLinearIndex;
exclind.y = jStart*streamWidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jAtom.x + jAtom.y*streamWidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstreamWidth))/jstreamWidth);
jstrindex.x = linind - jstrindex.y*jstreamWidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
j1q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
j2q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
j3q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
j4q = posq[ jAtom ].w;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
jQ = float4( j1q, j2q, j3q, j4q );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
sig = i1sig + jsig;
eps = i1eps * jeps;
qq = i1q*jQ;
d1 = j1pos - i1pos;
d2 = j2pos - i1pos;
d3 = j3pos - i1pos;
d4 = j4pos - i1pos;
// i = 1
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce1.xyz += de.x*d1;
bornForce1.xyz += de.y*d2;
bornForce1.xyz += de.z*d3;
bornForce1.xyz += de.w*d4;
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 2
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i2sig + jsig;
eps = i2eps * jeps;
qq = i2q*jQ;
d1 = j1pos - i2pos;
d2 = j2pos - i2pos;
d3 = j3pos - i2pos;
d4 = j4pos - i2pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce2.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce2.xyz += de.z*d3;
bornForce2.xyz += de.w*d4;
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 3
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i3sig + jsig;
eps = i3eps * jeps;
qq = i3q*jQ;
d1 = j1pos - i3pos;
d2 = j2pos - i3pos;
d3 = j3pos - i3pos;
d4 = j4pos - i3pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce3.xyz += de.x*d1;
bornForce3.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce3.xyz += de.w*d4;
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// i = 4
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = i4sig + jsig;
eps = i4eps * jeps;
qq = i4q*jQ;
d1 = j1pos - i4pos;
d2 = j2pos - i4pos;
d3 = j3pos - i4pos;
d4 = j4pos - i4pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de, bornSum );
de += fs;
//de = fs;
bornForce4.xyz += de.x*d1;
bornForce4.xyz += de.y*d2;
bornForce4.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
// ---------------------------------------------------------------------------------------
// j = 1
d1 = j1pos - i1pos;
d2 = j1pos - i2pos;
d3 = j1pos - i3pos;
d4 = j1pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 2
d1 = j2pos - i1pos;
d2 = j2pos - i2pos;
d3 = j2pos - i3pos;
d4 = j2pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 3
d1 = j3pos - i1pos;
d2 = j3pos - i2pos;
d3 = j3pos - i3pos;
d4 = j3pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 4
d1 = j4pos - i1pos;
d2 = j4pos - i2pos;
d3 = j4pos - i3pos;
d4 = j4pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms based on unroll
duplicationFactor: ?
strheight: atom stream height
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
posq: atom positions and charge
atomicRadii: atomic radii
scaledAtomicRadii: scaled atomic radii
bornForceFactor: (Born radii)**2*bornForce*Obcforce
bornForce1: i-unroll first force component, including dBorn_r/dr in .w
bornForce2: i-unroll second force component, including dBorn_r/dr in .w
bornForce3: i-unroll third force component, including dBorn_r/dr in .w
bornForce4: i-unroll fourth force component, including dBorn_r/dr in .w
#endif
--------------------------------------------------------------------------------------- */
kernel void kObcLoop2Cdlj4X( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth,
float jstreamWidth, float epsfac, float4 posq[][],
float atomicRadii[][], float scaledAtomicRadii[][],
float bornForceFactor[][], float4 isigeps[][],
float4 sigma[][], float4 epsilon[][], float excl[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
//float4 jBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
float i1q, i2q, i3q, i4q;
float i1sig, i2sig, i3sig, i4sig;
float i1eps, i2eps, i3eps, i4eps;
float j1q, j2q, j3q, j4q;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2, r2Masked;
float3 pad;
float4 exclmask;
float2 iatom;
float2 jstrindex;
float linind;
float4 jsig, jeps;
float4 jQ,qq;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, fs, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float sum;
// ---------------------------------------------------------------------------------------
// constants
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 frac4 = float4( 0.2f, 0.2f, 0.2f, 0.2f );
const float I_Unroll = 4.0f;
const float4 exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// fetch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ].xyz;
i1q = posq[ iAtom ].w;
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = atomicRadii[ iAtom ];
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i1sig = jstrindex.x;
i1eps = jstrindex.y;
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ].xyz;
i2q = posq[ iAtom ].w;
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = atomicRadii[ iAtom ];
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i2sig = jstrindex.x;
i2eps = jstrindex.y;
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ].xyz;
i3q = posq[ iAtom ].w;
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = atomicRadii[ iAtom ];
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i3sig = jstrindex.x;
i3eps = jstrindex.y;
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// fetch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ].xyz;
i4q = posq[ iAtom ].w;
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = atomicRadii[ iAtom ];
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ];
jstrindex = isigeps[ iAtom ];
i4sig = jstrindex.x;
i4eps = jstrindex.y;
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
//iBornForceFactor = float4( i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor );
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
whichRep = floor( forceIndex / roundedUpAtoms );
jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
exclind.x = iAtomLinearIndex;
exclind.y = jStart*streamWidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jAtom.x + jAtom.y*streamWidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstreamWidth))/jstreamWidth);
jstrindex.x = linind - jstrindex.y*jstreamWidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = atomicRadii[ jAtom ];
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j1pos = posq[ jAtom ].xyz;
j1q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = atomicRadii[ jAtom ];
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j2pos = posq[ jAtom ].xyz;
j2q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = atomicRadii[ jAtom ];
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j3pos = posq[ jAtom ].xyz;
j3q = posq[ jAtom ].w;
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = atomicRadii[ jAtom ];
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ];
j4pos = posq[ jAtom ].xyz;
j4q = posq[ jAtom ].w;
jAtom.x += 1.0f;
//jBornForceFactor = float4( j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor );
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
jQ = float4( j1q, j2q, j3q, j4q );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
/*
sig = i1sig + jsig;
eps = i1eps * jeps;
qq = i1q*jQ;
d1 = j1pos - i1pos;
d2 = j2pos - i1pos;
d3 = j3pos - i1pos;
d4 = j4pos - i1pos;
// i = 1
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce1.xyz += de.x*d1;
bornForce1.xyz += de.y*d2;
bornForce1.xyz += de.z*d3;
bornForce1.xyz += de.w*d4;
bornForce1.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x ) + floor( exclmask.y ) + floor( exclmask.z ) + floor( exclmask.w );
if( sum > 0.25f ){
//if( exclind.y < 4.9f && exclind.y > 3.1f ){
//if( exclind.y < 0.9f ){
//if( exclind.y < 1.9f && exclind.y > 0.1f ){
//if( exclind.y < 2.9f && exclind.y > 1.1f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce1.x += 1.0f;
bornForce1.y += exclusions;
bornForce1.z += sum;
bornForce1.w += exclmask.w;
//bornForce1 = exclmask;
}
// ---------------------------------------------------------------------------------------
// i = 2
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce2.x += 1.0f;
bornForce2.y += exclusions;
bornForce2.z += sum;
bornForce2.w += exclind.y;
}
/*
sig = i2sig + jsig;
eps = i2eps * jeps;
qq = i2q*jQ;
d1 = j1pos - i2pos;
d2 = j2pos - i2pos;
d3 = j3pos - i2pos;
d4 = j4pos - i2pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce2.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce2.xyz += de.z*d3;
bornForce2.xyz += de.w*d4;
bornForce2.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
// i = 3
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce3.x += 1.0f;
bornForce3.y += exclusions;
bornForce3.z += sum;
bornForce3.w += exclind.y;
}
/*
sig = i3sig + jsig;
eps = i3eps * jeps;
qq = i3q*jQ;
d1 = j1pos - i3pos;
d2 = j2pos - i3pos;
d3 = j3pos - i3pos;
d4 = j4pos - i3pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce3.xyz += de.x*d1;
bornForce3.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce3.xyz += de.w*d4;
bornForce3.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
// i = 4
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
bornForce4.x += 1.0f;
bornForce4.y += exclusions;
bornForce4.z += sum;
bornForce4.w += exclind.y;
}
/*
sig = i4sig + jsig;
eps = i4eps * jeps;
qq = i4q*jQ;
d1 = j1pos - i4pos;
d2 = j2pos - i4pos;
d3 = j3pos - i4pos;
d4 = j4pos - i4pos;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2Masked = r2 + exclmask*10000.0f;
fs = scalar_force_CDLJMerge( qq, epsfac, sig, eps, r2Masked );
loop2InternalR2( r2, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de, bornSum );
//de += fs;
de = fs;
bornForce4.xyz += de.x*d1;
bornForce4.xyz += de.y*d2;
bornForce4.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
bornForce4.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w;
*/
// ---------------------------------------------------------------------------------------
/*
// j = 1
d1 = j1pos - i1pos;
d2 = j1pos - i2pos;
d3 = j1pos - i3pos;
d4 = j1pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 2
d1 = j2pos - i1pos;
d2 = j2pos - i2pos;
d3 = j2pos - i3pos;
d4 = j2pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 3
d1 = j3pos - i1pos;
d2 = j3pos - i2pos;
d3 = j3pos - i3pos;
d4 = j3pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
// ---------------------------------------------------------------------------------------
// j = 4
d1 = j4pos - i1pos;
d2 = j4pos - i2pos;
d3 = j4pos - i3pos;
d4 = j4pos - i4pos;
// loop2Internal( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de, bornSum );
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de );
bornForce1.xyz += de.x*d1;
bornForce2.xyz += de.y*d2;
bornForce3.xyz += de.z*d3;
bornForce4.xyz += de.w*d4;
*/
// ---------------------------------------------------------------------------------------
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
......@@ -502,7 +502,6 @@ void kCalculateBornRadii(
float streamWidth,
float fstreamWidth,
::brook::stream posq,
::brook::stream atomicRadii,
::brook::stream scaledAtomicRadii,
::brook::stream bornForce1
);
......
......@@ -462,75 +462,6 @@ kernel void kAddAndMergeFloat4(
}
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
/*
kernel void kAddAndMergeFloat4_4(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
float4 inStream<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
out float4 outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
float floor_linind_iUnroll;
linind = (indexof outstream).x + (indexof outstream).y * atomStrWidth;
//If odd or even, we pick from diferent streams.
//odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
//linind = floor( linind / iUnroll );
floor_linind_iUnroll = round( (linind - fmod(linind, iUnroll))/iUnroll );
odd = linind - floor_linind_iUnroll * iUnroll;//bixia modify
linind = floor_linind_iUnroll; //bixia modify
outstream = inStream;
outstream.w = 0.0f;
// outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0.0f; i < repfac; i+= 1.0f ) {
//pindex.y = floor( linind / pstreamStrWidth );
pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
pindex.x = linind - pindex.y * pstreamStrWidth;
if ( odd < 0.5f ) { //is odd
outstream += pstream1[ pindex ];
} else if( odd < 1.5f ){
outstream += pstream2[ pindex ];
} else if( odd < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
linind += natoms/iUnroll;
}
} */
kernel void kAddAndMergeFloat4_4(
float repfac,
float atomStreamWidth,
......@@ -939,158 +870,6 @@ kernel void kPostCalculateBornRadii_nobranch(
}
kernel void kPostCalculateBornRadii_nobranchOk(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float conversion,
float mergeNonObcForces,
float4 inObcForces<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
float atomicRadii<>,
out float bornRadii<>,
out float obcChain<> ){
// ---------------------------------------------------------------------------------------
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
float4 o1,o2,o3,o4;
float4 tmp;
float2 iAtom;
float4 forces;
float expPlus, expMinus;
// ---------------------------------------------------------------------------------------
// constants -- OBC Type II
const float alphaObc = 1.0f;
const float betaObc = 0.8f;
const float gammaObc = 4.85f;
const float dielectricOffset = 0.009f;
// ---------------------------------------------------------------------------------------
// given atom index find force indices and streams
pindex = indexof( obcChain );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
// add current forces in inStream to forces stored in pstreams
// the .w entry is Born sum values; it will be used to calculate the
// Born radii and obcChain term
forces = inObcForces;
forces.w = 0.0f;
//forces = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// sum over j-loop 'duplications' by gathering from pstreams
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
o2 = pstream2[ pindex ];
o3 = pstream3[ pindex ];
o4 = pstream4[ pindex ];
tmp = qOff < 0.5f ? o1 : o2;
tmp = qOff < 1.5f ? tmp : o3;
tmp = qOff < 2.5f ? tmp : o4;
forces += tmp;
forceIndex += roundNatoms;
}
// compute Born radii and ObcChain
atomicRadiiOffset = atomicRadii - dielectricOffset;
bornSum = forces.w;
bornSum *= 0.5f*atomicRadiiOffset;
sum2 = bornSum*bornSum;
sum3 = bornSum*sum2;
// Tanh does not exist?
// calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
// tanhSum = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
tanhSum = bornSum - betaObc*sum2 + gammaObc*sum3;
expPlus = exp( tanhSum );
expMinus = 1.0f/expPlus;
tanhSum = ( expPlus - expMinus )/( expPlus + expMinus );
bornRadii = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );
obcIntermediate = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
obcChain = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
if( atomIndex >= natoms ){
bornRadii = 0.0f;
obcChain = 0.0f;
}
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kPreGbsaForce2(
float4 intermediateForceIn<>,
float bornRadii<>,
out float bornRadii2Force<> ){
// ---------------------------------------------------------------------------------------
// float P4 = 15.236f;
// P4_ec = P4/electricConstant
const float P4_ec = -0.09176f;
bornRadii2Force = P4_ec*bornRadii*bornRadii*intermediateForceIn.w;
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kPreObcForce2(
float4 intermediateForceIn<>,
float4 obcChain<>,
float bornRadii<>,
out float bornRadii2Force<> ){
// ---------------------------------------------------------------------------------------
bornRadii2Force = obcChain*bornRadii*bornRadii*intermediateForceIn.w;
}
/* Add forces from two streams */
kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){
......@@ -1114,30 +893,3 @@ kernel void kCopyFloat3To4(
outForce.xyz = inForce;
outForce.w = 0.0f;
}
/* ---------------------------------------------------------------------------------------
Calculate Born radius from bonded and nonbonded Gpol
gpolNonBonded value is in gpolNonBonded.w
--------------------------------------------------------------------------------------- */
kernel void kBornRadii( float4 gpolNonBonded<>, float gpolFixed<>, out float bornRadius<> ){
// ---------------------------------------------------------------------------------------
// constants
const float electricConstant = -166.02691f;
// 0.25*P4
const float P4_25 = 3.81575f;
// ---------------------------------------------------------------------------------------
bornRadius = gpolFixed + P4_25*gpolNonBonded.w;
bornRadius = electricConstant/bornRadius;
// ---------------------------------------------------------------------------------------
}
......@@ -52,12 +52,11 @@ kernel void
kshakeh_fix1(
float maxIterations, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f )
float4 params<>, // (1/m0, mu/2, blensq, 1/m1 )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
......@@ -88,7 +87,6 @@ kshakeh_fix1(
//or nans/infs in the calcs.
if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
......@@ -98,7 +96,6 @@ kshakeh_fix1(
}
if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
......@@ -129,10 +126,10 @@ kshakeh_fix1(
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
i = 0.0f;
......@@ -152,7 +149,7 @@ kshakeh_fix1(
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
xpj1 -= dr * params.w;
//Second hydrogen
......@@ -167,7 +164,7 @@ kshakeh_fix1(
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
xpj2 -= dr * params.w;
//Third hydrogen
......@@ -181,7 +178,7 @@ kshakeh_fix1(
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
xpj3 -= dr * params.w;
i += 1.0f;
}
......@@ -195,7 +192,7 @@ kshakeh_fix1(
}
kernel void kshakeh_update2_fix1(
kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
......@@ -208,26 +205,25 @@ kernel void kshakeh_update2_fix1(
){
float2 atom;
// atom.y = floor( invmap.x / strwidth );
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth;
// oposq = posq;
if ( invmap.y < 0 ){
oposq = posqp - posq;
oposq = posqp;
} else if ( invmap.y < 0.5f ){
oposq = cposq0[ atom ];
} else if ( invmap.y < 1.5f){
oposq = cposq1[ atom ];
oposq += cposq0[ atom ];
} else if ( invmap.y < 1.5f ){
oposq += cposq1[ atom ];
} else if ( invmap.y < 2.5f ){
oposq = cposq2[ atom ];
oposq += cposq2[ atom ];
} else if ( invmap.y < 3.5f ){
oposq = cposq3[ atom ];
oposq += cposq3[ atom ];
}
oposq += posq;
}
kernel void kshakeh_update1_fix1(
kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
......@@ -238,23 +234,23 @@ kernel void kshakeh_update1_fix1(
float3 cposq3[][], //ditto for h3
out float3 oposq<> //updated deltas
){
float2 atom;
// atom.y = floor( invmap.x / strwidth );
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth;
oposq = posq;
if ( invmap.y < 0 ){
oposq = posqp;
} else if ( invmap.y < 0.5f ){
oposq += cposq0[ atom ];
} else if ( invmap.y < 1.5f ){
oposq += cposq1[ atom ];
oposq = cposq0[ atom ];
} else if ( invmap.y < 1.5f){
oposq = cposq1[ atom ];
} else if ( invmap.y < 2.5f ){
oposq += cposq2[ atom ];
oposq = cposq2[ atom ];
} else if ( invmap.y < 3.5f ){
oposq += cposq3[ atom ];
oposq = cposq3[ atom ];
}
oposq += posq;
}
......@@ -32,7 +32,6 @@
void kshakeh_fix1 (
const float maxIterations,
const float strwidth,
const float invmH,
const float tolerance,
::brook::stream atoms,
::brook::stream posq,
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
......@@ -31,6 +30,20 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
//Applies a permutation to the given random vector stream
//Totally bandwidth bound and streams are large, should be done infrequently.
//Probably don't want gvin and gvout to be the same stream!
kernel void kpermute_vectors( float gstrwidth, float perm<>,
float3 gvin[][], out float3 gvout<> ){
float2 ind;
//ind.y = floor( perm / gstrwidth );
ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth );
ind.x = perm - ind.y * gstrwidth;
gvout = gvin[ ind ];
}
/* Stochastic Integrator
*
* Since we don't have random numbers on the GPU, we
......@@ -56,139 +69,6 @@
* constants that use it.
* */
/*
kernel void kupdate_sd1(
float xstrwidth, //atom stream width
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
//sd constants, prefixed with c
//to avoid 1 letter names, cxx = sdc[0].xx
float cem,
//sd precalculated constants
float pc1, //tau_t[0] * ( 1 - sdc[0].em )
float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh)
float pc3, //sdc[0].d/(tau_t * sdc[0].c);
//sdpc = sd precalculated per atom values
//sdpc.x = 1/sqrt(m)*sig[0].Yv
//sdpc.y = 1/sqrt(m)*sig[0].V
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>,
float3 f<>,
float3 v<>,
float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>,
out float4 posqp<>
){
float3 Vmh;
float3 fg1, fg2;
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
Vmh = sd2X * pc3 + sdpc.x * fg1;
sd1V = sdpc.y * fg2;
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
posqp.w = posq.w;
posqp.xyz = posq.xyz + vnew * pc2;
} */
/*Second part of sd update, after first constraints call*/
/*
kernel void kupdate_sd2(
float xstrwidth, //stream width of atoms
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
float pc1, //this is 1/pc2 for sd1
//= 1/(tau_t*(sdc[0].eph-sdc[0].emh))
float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1)
//per atom precalcs
//sdpc.x = 1/sqrt(m)*sig[0].Yx
//sdpc.y = 1/sqrt(m)*sig[0].X
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update
float4 posqp<>, //positions post-constraint
float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //Will need to be constrained again
){
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float3 fg1, fg2;
float3 Xmh;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
igauss.y = floor( linind_gauss / gstrwidth );
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
v = pc1 * ( posqp.xyz - posq.xyz );
Xmh = sd1V * pc2 + sdpc.x * fg1;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
} */
//Applies a permutation to the given random vector stream
//Totally bandwidth bound and streams are large, should be done infrequently.
//Probably don't want gvin and gvout to be the same stream!
kernel void kpermute_vectors( float gstrwidth, float perm<>,
float3 gvin[][], out float3 gvout<> )
{
float2 ind;
//ind.y = floor( perm / gstrwidth );
ind.y = round( (perm - fmod( perm, gstrwidth ))/gstrwidth );
ind.x = perm - ind.y * gstrwidth;
gvout = gvin[ ind ];
}
/*
* Alternative formulation to handle precision better
* In updatesd1, we output only deltaV's. The shake
......@@ -220,14 +100,14 @@ kernel void kupdate_sd1_fix1(
float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>,
float3 posq<>,
float3 f<>,
float3 v<>,
float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>,
out float4 posqp<>
out float3 posqp<>
){
float3 Vmh;
float3 fg1, fg2;
......@@ -236,69 +116,23 @@ kernel void kupdate_sd1_fix1(
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
linind_gauss = 2 * linind_gauss + goffset;
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
/*
Gromacs code
n = 1 -> ngtc
sdc[n].gdt = dt/tau_t[n];
sdc[n].eph = exp(sdc[n].gdt/2);
sdc[n].emh = exp(-sdc[n].gdt/2);
sdc[n].em = exp(-sdc[n].gdt);
sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1);
// 2.15 in paper (C)
sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran);
V[n-start][d] = ism*sig[gt].V*fgauss(&jran);
v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) +
V[n-start][d] - sdc[gt].em*Vmh;
xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
*/
// ---------------------------------------------------------------------------------------------
// float pc3 = sdc[0].d/(tau_t * sdc[0].c);
//sdpc.x = 1/sqrt(m)*sig[0].Yv
Vmh = sd2X * pc3 + sdpc.x * fg1;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//sdpc.y = 1/sqrt(m)*sig[0].V
sd1V = sdpc.y * fg2;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//
// float pc1 = tau_t[0] * ( 1 - sdc[0].em )
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
// float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh)
posqp = posq;
posqp.xyz += vnew * pc2;
posqp = vnew * pc2;
}
kernel void kupdate_sd2_fix1(
......@@ -320,13 +154,14 @@ kernel void kupdate_sd2_fix1(
float3 fgauss[][],
float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update
float4 posqp<>, //deltas post constraint
float3 posq<>, //positions pre-update
float3 posqp<>, //deltas post constraint
float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //new deltas, need to be constrained again
out float3 posqp2<> //new deltas, need to be constrained again
){
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float3 fg1, fg2;
......@@ -336,191 +171,17 @@ kernel void kupdate_sd2_fix1(
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg2 = fgauss[ igauss ];
//v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!)
v = pc1 * (posqp.xyz - posq.xyz);
Xmh = sd1V * pc2 + sdpc.x * fg1;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
}
kernel void kupdate_sd1_fix1_FixedRV(
float xstrwidth, //atom stream width
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
//sd constants, prefixed with c
//to avoid 1 letter names, cxx = sdc[0].xx
float cem,
//sd precalculated constants
float pc1, //tau_t[0] * ( 1 - sdc[0].em )
float pc2, //tau_t[gt] * (sdc[gt].eph - sdc.emh)
float pc3, //sdc[0].d/(tau_t * sdc[0].c);
//sdpc = sd precalculated per atom values
//sdpc.x = 1/sqrt(m)*sig[0].Yv
//sdpc.y = 1/sqrt(m)*sig[0].V
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd2X<>, //This is calculated in sd2 of the previous step
float4 posq<>,
float3 f<>,
float3 v<>,
float invmass<>,
out float3 sd1V<>, //This is V in gromacs, reused in sd2
out float3 vnew<>,
out float4 posqp<>
){
float3 Vmh;
float3 fg1, fg2;
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float RandomValueVsp = 0.1f;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
//2 random vectors used in each fragment
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
//fg1 = fgauss[ igauss ];
fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
//fg2 = fgauss[ igauss ];
fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
/*
Gromacs code
n = 1 -> ngtc
sdc[n].gdt = dt/tau_t[n];
sdc[n].eph = exp(sdc[n].gdt/2);
sdc[n].emh = exp(-sdc[n].gdt/2);
sdc[n].em = exp(-sdc[n].gdt);
sdc[n].b = sdc[n].gdt*(sqr(sdc[n].eph)-1) - 4*sqr(sdc[n].eph-1);
// 2.15 in paper (C)
sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
Vmh = X[n-start][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c) + ism*sig[gt].Yv*fgauss(&jran);
V[n-start][d] = ism*sig[gt].V*fgauss(&jran);
v[n][d] = vn*sdc[gt].em + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em) +
V[n-start][d] - sdc[gt].em*Vmh;
xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
*/
// ---------------------------------------------------------------------------------------------
// float pc3 = sdc[0].d/(tau_t * sdc[0].c);
//sdpc.x = 1/sqrt(m)*sig[0].Yv
Vmh = sd2X * pc3 + sdpc.x * fg1;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//sdpc.y = 1/sqrt(m)*sig[0].V
sd1V = sdpc.y * fg2;
// ---------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------
//
// float pc1 = tau_t[0] * ( 1 - sdc[0].em )
vnew = v * cem + invmass * f * pc1 + sd1V - Vmh * cem;
// float pc2 = tau_t[gt] * (sdc[gt].eph - sdc.emh)
posqp = posq;
posqp.xyz += vnew * pc2;
}
kernel void kupdate_sd2_fix1_FixedRV(
float xstrwidth, //stream width of atoms
float gstrwidth, //stream width of random numbers
float goffset, //starting offset for random numbers
float pc1, //this is 1/pc2 for sd1
//= 1/(tau_t*(sdc[0].eph-sdc[0].emh))
float pc2, //= tau_t * sdc[0].d/(sdc[0].em - 1)
//per atom precalcs
//sdpc.x = 1/sqrt(m)*sig[0].Yx
//sdpc.y = 1/sqrt(m)*sig[0].X
float2 sdpc<>,
//Normally distributed random vectors
float3 fgauss[][],
float3 sd1V<>, //calculated in sd1
float4 posq<>, //positions pre-update
float4 posqp<>, //deltas post constraint
float3 vnew<>, //velocities after sd1
out float3 sd2X<>, //Used in sd1
out float3 v<>, //velocities corrected for constraints
out float4 posqp2<> //new deltas, need to be constrained again
){
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
float3 fg1, fg2;
float3 Xmh;
float RandomValueVsp = 0.1f;
igauss = indexof( posq );
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = 2 * linind_gauss + goffset;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
//fg1 = fgauss[ igauss ];
fg1 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
linind_gauss += 1.0f;
//igauss.y = floor( linind_gauss / gstrwidth );
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
// fg2 = fgauss[ igauss ];
fg2 = float3( RandomValueVsp, RandomValueVsp, RandomValueVsp );
//v = pc1 * posqp.xyz (!!!!!!!! ?????? !!!!!!!)
v = pc1 * (posqp.xyz - posq.xyz);
v = pc1 * posqp;
Xmh = sd1V * pc2 + sdpc.x * fg1;
sd2X = sdpc.y * fg2;
posqp2 = posqp;
posqp2.xyz += sd2X - Xmh;
posqp2 = posqp + sd2X - Xmh;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment