Commit 6ccf6169 authored by Peter Eastman's avatar Peter Eastman
Browse files

Removed Brook platform

parent 82e0bd2f
#ifndef OPENMM_BROOK_VERLET_DYNAMCIS_H_
#define OPENMM_BROOK_VERLET_DYNAMCIS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <vector>
#include <set>
#include "BrookStreamImpl.h"
#include "BrookShakeAlgorithm.h"
#include "BrookPlatform.h"
#include "BrookCommon.h"
namespace OpenMM {
/**
*
* Encapsulates stochastic dynamics algorithm
*
*/
class BrookVerletDynamics : public BrookCommon {
public:
/**
* Constructor
*
*/
BrookVerletDynamics( );
/**
* Destructor
*
*/
~BrookVerletDynamics();
/**
* Get step size
*
* @return step size
*/
BrookOpenMMFloat getStepSize( void ) const;
/**
*
* Get array of derived parameters indexed by 'DerivedParameters' enums
*
* @return array
*
*/
const BrookOpenMMFloat* getDerivedParameters( void ) const;
/**
* Get VerletDynamics particle stream width
*
* @return particle stream width
*/
int getVerletDynamicsParticleStreamWidth( void ) const;
/**
* Get VerletDynamics particle stream height
*
* @return particle stream height
*/
int getVerletDynamicsParticleStreamHeight( void ) const;
/**
* Get VerletDynamics particle stream size
*
* @return particle stream size
*/
int getVerletDynamicsParticleStreamSize( void ) const;
/**
* Update parameters
*
* @param step size step size
*
* @return DefaultReturnValue
*
*/
int updateParameters( double stepSize );
/**
* Update
*
* @param positions particle positions
* @param velocities particle velocities
* @param forces particle forces
* @param brookShakeAlgorithm BrookShakeAlgorithm reference
*
* @return DefaultReturnValue
*
*/
int update( BrookStreamImpl& positions, BrookStreamImpl& velocities,
const BrookStreamImpl& forces, BrookShakeAlgorithm& brookShakeAlgorithm );
/**
* Get array of VerletDynamics streams
*
* @return array ofstreams
*
*/
BrookFloatStreamInternal** getStreams( void );
/*
* Setup of VerletDynamics parameters
*
* @param masses particle masses
* @param platform Brook platform
*
* @return ErrorReturnValue value if error, else DefaultReturnValue
*
* */
int setup( const std::vector<double>& masses, const Platform& platform );
/*
* Get contents of object
*
* @param level of dump
*
* @return string containing contents
*
* */
std::string getContentsString( int level = 0 ) const;
/**
* Get V-prime stream
*
* @return V-prime stream
*
*/
BrookFloatStreamInternal* getVPrimeStream( void ) const;
/**
* Get X-prime stream
*
* @return X-prime stream
*
*/
BrookFloatStreamInternal* getXPrimeStream( void ) const;
/**
* Get inverse sqrt masses
*
* @return inverse sqrt masses stream
*
*/
BrookFloatStreamInternal* getInverseMassStream( void ) const;
private:
// streams indices
enum BrookVerletDynamicsStreams {
VPrimeStream,
XPrimeStream,
InverseMassStream,
LastStreamIndex
};
// track step (diagnostics)
int _internalStepCount;
BrookOpenMMFloat _stepSize;
// Particle stream dimensions
int _verletParticleStreamWidth;
int _verletParticleStreamHeight;
int _verletParticleStreamSize;
/*
* Update streams
*
* @return DefaultReturn
*
*/
int _updateVerletStreams( void );
// inverse masses
BrookOpenMMFloat* _inverseMasses;
// internal streams
BrookFloatStreamInternal* _verletStreams[LastStreamIndex];
/**
* Set stepSize
*
* @param stepSize
*
* @return DefaultReturn
*
*/
int _setStepSize( BrookOpenMMFloat stepSize );
/*
* Setup of stream dimensions
*
* @param particleStreamSize particle stream size
* @param particleStreamWidth particle stream width
*
* @return ErrorReturnValue if error, else DefaultReturnValue
*
* */
int _initializeStreamSizes( int particleStreamSize, int particleStreamWidth );
/**
* Initialize stream dimensions
*
* @param numberOfParticles number of particles
* @param platform platform
*
* @return ErrorReturnValue if error, else DefaultReturnValue
*
*/
int _initializeStreamSizes( int numberOfParticles, const Platform& platform );
/**
* Initialize stream dimensions and streams
*
* @param platform platform
*
* @return nonzero value if error
*
*/
int _initializeStreams( const Platform& platform );
/**
* Set masses
*
* @param masses particle masses
*
*/
int _setInverseMasses( const std::vector<double>& masses );
};
} // namespace OpenMM
#endif /* OPENMM_BROOK_VERLET_DYNAMCIS_H_ */
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/LangevinIntegrator.h"
#include "ReferencePlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
#include "BrookStreamImpl.h"
#include "OpenMMBrookInterface.h"
#include "kernels/kcommon.h"
#include <cmath>
#include <limits>
#include <sstream>
using namespace OpenMM;
using namespace std;
/**
* OpenMMBrookInterface constructor
*
*/
OpenMMBrookInterface::OpenMMBrookInterface( int streamWidth, int duplicationFactor ) : _particleStreamWidth(streamWidth){
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "OpenMMBrookInterface::OpenMMBrookInterface";
// ---------------------------------------------------------------------------------------
_numberOfParticles = 0;
_triggerForceKernel = NULL;
_triggerEnergyKernel = NULL;
_positions = NULL;
_velocities = NULL;
_forces = NULL;
_log = NULL;
//_log = stderr;
_time = 0.0;
_particleStreamSize = -1;
for( int ii = 0; ii < LastBondForce; ii++ ){
_bondParameters[ii] = NULL;
}
if( duplicationFactor < 1 ){
duplicationFactor = 4;
}
_brookNonBonded.setDuplicationFactor( duplicationFactor );
_brookGbsa.setDuplicationFactor( duplicationFactor );
}
/**
* OpenMMBrookInterface destructor
*
*/
OpenMMBrookInterface::~OpenMMBrookInterface( ){
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "OpenMMBrookInterface::OpenMMBrookInterface";
// ---------------------------------------------------------------------------------------
for( int ii = 0; ii < LastBondForce; ii++ ){
delete _bondParameters[ii];
}
}
/**
* Get number of particles
*
* @return number of particles
*
*/
int OpenMMBrookInterface::getNumberOfParticles( void ) const {
return _numberOfParticles;
}
/**
* Set number of particles
*
* @param numberOfParticles number of particles
*
*/
int OpenMMBrookInterface::setNumberOfParticles( int numberOfParticles ){
_numberOfParticles = numberOfParticles;
return BrookCommon::DefaultReturnValue;
}
/**
* Get the current time
*
* @return the current time
*
*/
double OpenMMBrookInterface::getTime( void ) const {
return _time;
}
/**
* Set the current time
*
* @param time the current time
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setTime( double time) {
_time = time;
return BrookCommon::DefaultReturnValue;
}
/**
* Get particle stream width
*
* @return particle stream width
*
*/
int OpenMMBrookInterface::getParticleStreamWidth( void ) const {
return _particleStreamWidth;
}
/**
* Get particle stream size
*
* @return particle stream size
*
*/
int OpenMMBrookInterface::getParticleStreamSize( void ) const {
if( _particleStreamSize < 0 && _numberOfParticles > 0 ){
OpenMMBrookInterface* localThis = const_cast<OpenMMBrookInterface* const>(this);
localThis->_particleStreamSize = BrookPlatform::getStreamSize( _numberOfParticles, _particleStreamWidth, NULL );
}
return _particleStreamSize;
}
/**
* Get log file reference
*
* @return log file reference
*
*/
FILE* OpenMMBrookInterface::getLog( void ) const {
return _log;
}
/**
* Set log file reference
*
* @param log file reference
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setLog( FILE* log ){
_log = log;
_brookBonded.setLog( log );
_brookNonBonded.setLog( log );
_brookGbsa.setLog( log );
return BrookCommon::DefaultReturnValue;
}
/**
* Get BrookBondParameters at specified index
*
* @param index
*
* @return BrookBondParameters* object
*
*/
BrookBondParameters* OpenMMBrookInterface::_getBondParameters( BondParameterIndices index ) const {
return _bondParameters[index];
}
/**
* Set BrookBondParameters at specified index
*
* @param index
* @param brookBondParameters brookBondParameters for BrookBondParameters
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::_setBondParameters( BondParameterIndices index, BrookBondParameters* brookBondParameters ){
if( brookBondParameters && brookBondParameters->getNumberOfBonds() > 0 ){
_brookBonded.setIsActive( 1 );
}
_bondParameters[index] = brookBondParameters;
if( !brookBondParameters || brookBondParameters->getNumberOfBonds() < 1 ){
int isActive = 0;
for( int ii = 0; ii < LastBondForce && !isActive; ii++ ){
isActive = (_bondParameters[ii] != NULL && brookBondParameters->getNumberOfBonds() > 0) ? 1 : 0;
}
_brookBonded.setIsActive( isActive );
}
return BrookCommon::DefaultReturnValue;
}
/**
* Get BrookBondParameters for harmonic bond force
*
* @return brookBondParameters for harmonic bond force
*
*/
BrookBondParameters* OpenMMBrookInterface::getHarmonicBondForceParameters( void ) const {
return _getBondParameters( HarmonicBondIndex );
}
/**
* Set BrookBondParameters for harmonic bond force
*
* @param brookBondParameters brookBondParameters for harmonic bond force
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setHarmonicBondForceParameters( BrookBondParameters* brookBondParameters ){
_brookBonded.setupCompleted( 0 );
return _setBondParameters( HarmonicBondIndex, brookBondParameters );
}
/**
* Get BrookBondParameters for harmonic angle force
*
* @return brookBondParameters for harmonic angle force
*
*/
BrookBondParameters* OpenMMBrookInterface::getHarmonicAngleForceParameters( void ) const {
return _getBondParameters( HarmonicAngleIndex );
}
/**
* Set BrookBondParameters for harmonic angle force
*
* @param brookBondParameters brookBondParameters for harmonic angle force
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setHarmonicAngleForceParameters( BrookBondParameters* brookBondParameters ){
_brookBonded.setupCompleted( 0 );
return _setBondParameters( HarmonicAngleIndex, brookBondParameters );
}
/**
* Get BrookBondParameters for periodic torsion force
*
* @return brookBondParameters for periodic torsion force
*
*/
BrookBondParameters* OpenMMBrookInterface::getPeriodicTorsionForceParameters( void ) const {
return _getBondParameters( PeriodicTorsionForceIndex );
}
/**
* Set BrookBondParameters for periodic torsion force
*
* @param brookBondParameters brookBondParameters for periodic torsion force
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setPeriodicTorsionForceParameters( BrookBondParameters* brookBondParameters ){
_brookBonded.setupCompleted( 0 );
return _setBondParameters( PeriodicTorsionForceIndex, brookBondParameters );
}
/**
* Get BrookBondParameters for rb torsion force
*
* @return brookBondParameters for rb torsion force
*
*/
BrookBondParameters* OpenMMBrookInterface::getRBTorsionForceParameters( void ) const {
return _getBondParameters( RbTorsionForceIndex );
}
/**
* Set BrookBondParameters for RB torsion force
*
* @param brookBondParameters brookBondParameters for RB torsion force
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setRBTorsionForceParameters( BrookBondParameters* brookBondParameters ){
_brookBonded.setupCompleted( 0 );
return _setBondParameters( RbTorsionForceIndex, brookBondParameters );
}
/**
* Get BrookBondParameters for LJ 14 force
*
* @return brookBondParameters for LJ 14 force
*
*/
BrookBondParameters* OpenMMBrookInterface::getNonBonded14ForceParameters( void ) const {
return _getBondParameters( LJ14Index );
}
/**
* Set BrookBondParameters for LJ 14 force
*
* @param brookBondParameters brookBondParameters for LJ 14 force
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setNonBonded14ForceParameters( BrookBondParameters* brookBondParameters ){
_brookBonded.setupCompleted( 0 );
return _setBondParameters( LJ14Index, brookBondParameters );
}
/**
* Get positions stream
*
* @return particle positions
*
*/
BrookStreamImpl* OpenMMBrookInterface::getParticlePositions( void ){
return _positions;
}
/**
* Set positions stream
*
* @param positions Brook stream containing particle positions
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setParticlePositions( BrookStreamImpl* positions ){
_positions = positions;
return DefaultReturnValue;
}
/**
* Get velocities stream
*
* @return particle velocities
*
*/
BrookStreamImpl* OpenMMBrookInterface::getParticleVelocities( void ){
return _velocities;
}
/**
* Set velocities stream
*
* @param velocities Brook stream containing particle velocities
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setParticleVelocities( BrookStreamImpl* velocities ){
_velocities = velocities;
return DefaultReturnValue;
}
/**
* Get forces stream
*
* @return ParticleForces
*
*/
BrookStreamImpl* OpenMMBrookInterface::getParticleForces( void ){
return _forces;
}
/**
* Set forces stream
*
* @param forces Brook stream containing particle forces
*
* @return DefaultReturnValue
*
*/
int OpenMMBrookInterface::setParticleForces( BrookStreamImpl* forces ){
_forces = forces;
return DefaultReturnValue;
}
/**
* Set trigger Force Kernel
*
* @param triggerForceKernel kernel to calculate force
*
*/
void OpenMMBrookInterface::setTriggerForceKernel( void* triggerForceKernel ){
_triggerForceKernel = triggerForceKernel;
}
/**
* Get trigger Force Kernel
*
* @return triggerForceKernel kernel to calculate force
*
*/
void* OpenMMBrookInterface::getTriggerForceKernel( void ) const {
return _triggerForceKernel;
}
/**
* Set trigger Energy Kernel
*
* @param triggerEnergyKernel kernel to calculate force
*
*/
void OpenMMBrookInterface::setTriggerEnergyKernel( void* triggerEnergyKernel ){
_triggerEnergyKernel = triggerEnergyKernel;
}
/**
* Get trigger Energy Kernel
*
* @return triggerEnergyKernel kernel to calculate force
*
*/
void* OpenMMBrookInterface::getTriggerEnergyKernel( void ) const {
return _triggerEnergyKernel;
}
/**
* Get Brook non bonded
*
* @return BrookNonBonded reference
*
*/
BrookNonBonded& OpenMMBrookInterface::getBrookNonBonded( void ){
return _brookNonBonded;
}
/**
* Get Brook GBSA
*
* @return BrookGbsa reference
*
*/
BrookGbsa& OpenMMBrookInterface::getBrookGbsa( void ){
return _brookGbsa;
}
/**
* Zero forces
*
* @param context context
*
*/
void OpenMMBrookInterface::zeroForces( ContextImpl& context ){
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "OpenMMBrookInterface::zeroForces";
// ---------------------------------------------------------------------------------------
// zero forces
BrookStreamImpl* forces = getParticleForces();
kzerof3( forces->getBrookStream() );
// ---------------------------------------------------------------------------------------
}
/**
* Compute forces
*
* @param context context
*
*/
void OpenMMBrookInterface::computeForces( ContextImpl& context ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "OpenMMBrookInterface::executeForces";
static int printOn = 0;
static const int MaxErrorMessages = 2;
static int ErrorMessages = 0;
FILE* log;
// ---------------------------------------------------------------------------------------
//setLog( stderr );
if( printOn && getLog() ){
log = getLog();
} else {
printOn = 0;
}
BrookStreamImpl* positions = getParticlePositions();
BrookStreamImpl* forces = getParticleForces();
// info
if( printOn > 0 ){
//if( 1 ){
printForcesToFile( context );
}
// nonbonded forces
if( _brookNonBonded.isActive() ){
_brookNonBonded.computeForces( *positions, *forces );
}
// ---------------------------------------------------------------------------------------
// bonded forces
if( printOn ){
(void) fprintf( log, "%s done nonbonded: bonded=%d completed=%d\n", methodName.c_str(),
_brookBonded.isActive(), _brookBonded.isSetupCompleted() );
(void) fflush( log );
}
if( _brookBonded.isActive() ){
// perform setup first time through
if( _brookBonded.isSetupCompleted() == 0 ){
_brookBonded.setup( getNumberOfParticles(), getHarmonicBondForceParameters(), getHarmonicAngleForceParameters(),
getPeriodicTorsionForceParameters(), getRBTorsionForceParameters(),
getNonBonded14ForceParameters(),
getParticleStreamWidth(), getParticleStreamSize() );
}
_brookBonded.computeForces( *positions, *forces );
// diagnostics
if( printOn ){
static int step = 0;
static const int stopStep = 10;
(void) fprintf( log, "%s done bonded computeForces\n", methodName.c_str() );
(void) fflush( log );
/*
(void) fprintf( getLog(), "\nFinal NB & bonded forces" );
BrookStreamInternal* brookStreamInternalF = forceStream.getBrookStreamInternal();
brookStreamInternalF->printToFile( getLog() );
void* dataV = brookStreamInternalF->getData(1);
float* data = (float*) dataV;
(void) fprintf( getLog(), "\nFinal NB & bonded forces RAW\n" );
for( int ii = 0; ii < _brookNonBonded->getNumberOfParticles()*3; ii += 3 ){
(void) fprintf( getLog(), "%d [%.6e %.6e %.6e]\n", ii, data[ii], data[ii+1], data[ii+2] );
}
*/
if( step++ >= stopStep ){
exit(0);
}
}
}
// GBSA OBC forces
if( _brookGbsa.isActive() ){
_brookGbsa.computeForces( *positions, *forces );
}
// ---------------------------------------------------------------------------------------
}
/**
* Print forces to file
*
* @param context context
*
*/
void OpenMMBrookInterface::printForcesToFile( ContextImpl& context ){
// ---------------------------------------------------------------------------------------
static const std::string methodName = "OpenMMBrookInterface::printForcesToFile";
float zero = 0.0f;
static int step = 0;
// ---------------------------------------------------------------------------------------
// first step only?
if( step > 1 ){
return;
}
std::stringstream fileNameBaseS;
//fileNameBase << "Brook_" << context.getTime() << "_";
fileNameBaseS << "Brook_" << step++ << "_";
std::string fileNameBase = fileNameBaseS.str();
// create vector of streams for output
BrookStreamImpl* positions = getParticlePositions();
BrookStreamImpl* forces = getParticleForces();
std::vector<BrookStreamInternal*> streams;
streams.push_back( positions->getBrookStreamInternal() );
streams.push_back( forces->getBrookStreamInternal() );
// ---------------------------------------------------------------------------------------
// nonbonded forces
if( _brookNonBonded.isActive() ){
forces->fillWithValue( &zero );
_brookNonBonded.computeForces( *positions, *forces );
std::string fileName = fileNameBase + "NonBonded.txt";
BrookStreamInternal::printStreamsToFile( fileName, streams );
}
// ---------------------------------------------------------------------------------------
// bonded forces
if( _brookBonded.isActive() ){
// perform setup first time through
if( _brookBonded.isSetupCompleted() == 0 ){
_brookBonded.setup( getNumberOfParticles(), getHarmonicBondForceParameters(), getHarmonicAngleForceParameters(),
getPeriodicTorsionForceParameters(), getRBTorsionForceParameters(),
getNonBonded14ForceParameters(),
getParticleStreamWidth(), getParticleStreamSize() );
}
forces->fillWithValue( &zero );
_brookBonded.computeForces( *positions, *forces );
std::string fileName = fileNameBase + "Bonded.txt";
BrookStreamInternal::printStreamsToFile( fileName, streams );
}
// ---------------------------------------------------------------------------------------
// GBSA OBC forces
if( _brookGbsa.isActive() ){
forces->fillWithValue( &zero );
_brookGbsa.computeForces( *positions, *forces );
std::string fileName = fileNameBase + "Obc.txt";
BrookStreamInternal::printStreamsToFile( fileName, streams );
}
// ---------------------------------------------------------------------------------------
// all forces
if( 1 ){
forces->fillWithValue( &zero );
if( _brookNonBonded.isActive() ){
_brookNonBonded.computeForces( *positions, *forces );
}
if( _brookBonded.isActive() ){
_brookBonded.computeForces( *positions, *forces );
}
if( _brookGbsa.isActive() ){
_brookGbsa.computeForces( *positions, *forces );
}
std::string fileName = fileNameBase + "AllF.txt";
BrookStreamInternal::printStreamsToFile( fileName, streams );
}
forces->fillWithValue( &zero );
// ---------------------------------------------------------------------------------------
}
/**
* Compute energy
*
* @param context context
* @param system system reference
*
*/
double OpenMMBrookInterface::computeEnergy( ContextImpl& context, System& system ){
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "OpenMMBrookInterface::computeEnergy";
// ---------------------------------------------------------------------------------------
// We don't currently have GPU kernels to calculate energy, so instead we have the reference
// platform do it. This is VERY slow.
LangevinIntegrator integrator(0.0, 1.0, 0.0);
ReferencePlatform platform;
Context refContext( system, integrator, platform );
const Stream& positions = context.getPositions();
double* posData = new double[positions.getSize()*3];
positions.saveToArray(posData);
vector<Vec3> pos(positions.getSize());
for( unsigned int ii = 0; ii < pos.size(); ii++ ){
pos[ii] = Vec3(posData[3*ii], posData[3*ii+1], posData[3*ii+2]);
}
delete[] posData;
refContext.setPositions(pos);
return refContext.getState(State::Energy).getPotentialEnergy();
}
#ifndef OPENMM_BROOK_INTERFACE_H_
#define OPENMM_BROOK_INTERFACE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/kernels.h"
#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "BrookBondParameters.h"
#include "BrookBonded.h"
#include "BrookNonBonded.h"
#include "BrookGbsa.h"
#include "openmm/NonbondedForce.h"
#include "openmm/Context.h"
#include "openmm/System.h"
namespace OpenMM {
/**
* OpenMM-Brook interface methods
*/
class OpenMMBrookInterface {
public:
OpenMMBrookInterface( int streamWidth, int duplicationFactor );
~OpenMMBrookInterface();
/**
* Get reference Context
*
* @param numberOfParticles number of particles
*
* @return Context
*
*/
Context* getReferenceContext( int numberOfParticles );
/**
* Set log file reference
*
* @param log file reference
*
* @return DefaultReturnValue
*
*/
int setLog( FILE* log );
/*
* Get contents of object
*
* @param level of dump
*
* @return string containing contents
*
* */
std::string getContents( int level ) const;
/**
* Get number of particles
*
* @return number of particles
*
*/
int getNumberOfParticles( void ) const;
/**
* Set number of particles
*
* @param numberOfParticles number of particles
*
* @return DefaultReturnValue
*
*/
int setNumberOfParticles( int numberOfParticles );
/**
* Get the current time
*
* @return the current time
*
*/
double getTime( void ) const;
/**
* Set the current time
*
* @param time the current time
*
* @return DefaultReturnValue
*
*/
int setTime( double time);
/**
* Get particle stream width
*
* @return particle stream width
*
*/
int getParticleStreamWidth( void ) const;
/**
* Get particle stream size
*
* @return particle stream size
*
*/
int getParticleStreamSize( void ) const;
/**
* Get log file reference
*
* @return log file reference
*
*/
FILE* getLog( void ) const;
/**
* Zero forces
*
* @param context ContextImpl context
*
*/
void zeroForces( ContextImpl& context );
/**
* Compute forces
*
* @param context ContextImpl context
*
*/
void computeForces( ContextImpl& context );
/**
* Compute energy
*
* @param context ContextImpl context
* @param system system reference
*
* @return potential energy
*
*/
double computeEnergy( ContextImpl& context, System& system );
/**
* Set trigger Force Kernel
*
* @param triggerForceKernel kernel to calculate force
*
*/
void setTriggerForceKernel( void* triggerForceKernel );
/**
* Get trigger Force Kernel
*
* @return triggerForceKernel kernel to calculate force
*
*/
void* getTriggerForceKernel( void ) const;
/**
* Set trigger Energy Kernel
*
* @param triggerEnergyKernel kernel to calculate force
*
*/
void setTriggerEnergyKernel( void* triggerEnergyKernel );
/**
* Get trigger Energy Kernel
*
* @return triggerEnergyKernel kernel to calculate force
*
*/
void* getTriggerEnergyKernel( void ) const;
/**
* Get BrookNonBonded reference
*
* @return BrookNonBonded reference
*
*/
BrookNonBonded& getBrookNonBonded( void );
/**
* Get BrookGbsa reference
*
* @return BrookGbsa reference
*
*/
BrookGbsa& getBrookGbsa( void );
/**
* Get BrookBondParameters for harmonic bond force
*
* @return brookBondParameters for BrookBondParameters for harmonic bond force
*
*/
BrookBondParameters* getHarmonicBondForceParameters( void ) const;
/**
* Set BrookBondParameters for harmonic bond force
*
* @param brookBondParameters BrookBondParameters for harmonic bond force
*
* @return DefaultReturnValue
*
*/
int setHarmonicBondForceParameters( BrookBondParameters* brookBondParameters );
/**
* Get BrookBondParameters for harmonic angle force
*
* @return brookBondParameters for BrookBondParameters for harmonic angle force
*
*/
BrookBondParameters* getHarmonicAngleForceParameters( void ) const;
/**
* Set BrookBondParameters for harmonic angle force
*
* @param brookBondParameters brookBondParameters for BrookBondParameters for harmonic angle force
*
* @return DefaultReturnValue
*
*/
int setHarmonicAngleForceParameters( BrookBondParameters* brookBondParameters );
/**
* Get BrookBondParameters for periodic torsion force
*
* @return brookBondParameters for BrookBondParameters for periodic torsion force
*
*/
BrookBondParameters* getPeriodicTorsionForceParameters( void ) const;
/**
* Set BrookBondParameters for periodic torsion force
*
* @param brookBondParameters for periodic torsion force
*
* @return DefaultReturnValue
*
*/
int setPeriodicTorsionForceParameters( BrookBondParameters* brookBondParameters );
/**
* Get BrookBondParameters for RB torsion force
*
* @return brookBondParameters for BrookBondParameters for RB torsion force
*
*/
BrookBondParameters* getRBTorsionForceParameters( void ) const;
/**
* Set BrookBondParameters for RB torsion force
*
* @param brookBondParameters brookBondParameters for RB torsion force
*
* @return DefaultReturnValue
*
*/
int setRBTorsionForceParameters( BrookBondParameters* brookBondParameters );
/**
* Get BrookBondParameters for LJ 14 forces
*
* @return brookBondParameters for BrookBondParameters for LJ 14 forces
*
*/
BrookBondParameters* getNonBonded14ForceParameters( void ) const;
/**
* Set BrookBondParameters for LJ 14 force
*
* @param brookBondParameters brookBondParameters for LJ 14 force
*
* @return DefaultReturnValue
*
*/
int setNonBonded14ForceParameters( BrookBondParameters* brookBondParameters );
/**
* Get positions stream
*
* @return particle positions
*
*/
BrookStreamImpl* getParticlePositions( void );
/**
* Set positions stream
*
* @param positions Brook stream containing particle positions
*
* @return DefaultReturnValue
*
*/
int setParticlePositions( BrookStreamImpl* positions );
/**
* Get velocities stream
*
* @return particle velocities
*
*/
BrookStreamImpl* getParticleVelocities( void );
/**
* Set velocities stream
*
* @param velocities Brook stream containing particle velocities
*
* @return DefaultReturnValue
*
*/
int setParticleVelocities( BrookStreamImpl* velocities );
/**
* Get forces stream
*
* @return ParticleForces
*
*/
BrookStreamImpl* getParticleForces( void );
/**
* Set forces stream
*
* @param forces Brook stream containing particle forces
*
* @return DefaultReturnValue
*
*/
int setParticleForces( BrookStreamImpl* forces );
/**
* Print forces to file
*
* @param context context
*
*/
void printForcesToFile( ContextImpl& context );
private:
static const int DefaultReturnValue = 0;
static const int ErrorReturnValue = -1;
enum BondParameterIndices { HarmonicBondIndex, HarmonicAngleIndex, PeriodicTorsionForceIndex, RbTorsionForceIndex, LJ14Index, LastBondForce };
// log file reference
FILE* _log;
int _particleStreamWidth;
int _particleStreamSize;
// number of particles
int _numberOfParticles;
// Brook bonded, nonbonded, Gbsa
BrookBonded _brookBonded;
BrookNonBonded _brookNonBonded;
BrookGbsa _brookGbsa;
void* _triggerForceKernel;
void* _triggerEnergyKernel;
BrookBondParameters* _bondParameters[LastBondForce];
// context-related fields
BrookStreamImpl* _positions;
BrookStreamImpl* _velocities;
BrookStreamImpl* _forces;
double _time;
/**
* Set BrookBondParameters
*
* @param index
* @param brookBondParameters brookBondParameters for BrookBondParameters
*
* @return DefaultReturnValue
*
*/
int _setBondParameters( BondParameterIndices index, BrookBondParameters* brookBondParameters );
/**
* Get BrookBondParameters at specified index
*
* @param index
*
* @return BrookBondParameters* object
*
*/
BrookBondParameters* _getBondParameters( BondParameterIndices index ) const;
};
} // namespace OpenMM
#endif /* OPENMM_BROOK_INTERFACE_H_ */
#ifndef __INVMAP_H__
#define __INVMAP_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/*
* For each atom, calculates the positions at which it's
* forces are to be picked up from and stores the position
* in the appropriate index.
*
* Input: number of dihedrals, the atom indices, and a flag indicating
* whether we're doing i(0), j(1), k(2) or l(3)
* Output: an array of counts per atom
* arrays of inversemaps
* nimaps - the number of invmaps actually used.
*
* */
int
gpuCalcInvMap(
int posflag, //0-niatoms-1
int niatoms, //3 for angles, 4 for torsions, impropers
int nints, //number of interactions
int natoms, //number of atoms
int *atoms, //gromacs interaction list
int nmaps, //maximum number of inverse maps
int counts[], //output counts of how many places each atom occurs
float4 *invmaps[], //output array of nmaps inverse maps
int *nimaps //output max number of inverse maps actually used
);
void
gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile );
/* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true
* */
int
gpuCalcInvMap_merged(
int nints, //number of interactions
int natoms, //number of atoms
int *atoms, //ijkl,ijkl,ijkl...
int nmaps, //maximum number of inverse maps
int counts[], //output counts of how many places each atom occurs
float4 *invmaps[], //output array of nmaps inverse maps
int *nimaps //output max number of inverse maps actually used
);
/* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel
*
* buf should be nimaps * natoms large.
* */
int
gpuRepackInvMap_merged( int natoms, int nmaps, int *counts,
float4 *invmaps[], float4 *buf );
#endif //__INVMAP_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**---------------------------------------------------------------------------------------
This kernel calculates the linear momentum
@param mass stream of masses
@param velocities stream of velocities
@param linearMomentum output stream of linear momentum
--------------------------------------------------------------------------------------- */
kernel void kCalculateLinearMomentum( float mass<>, float3 velocities<>, out float3 linearMomentum<> ){
linearMomentum = mass*velocities;
}
/**---------------------------------------------------------------------------------------
This kernel scales the linear momentum
@param scale scale factor
@param linearMomentumIn input linearMomentum
@param linearMomentumOut output linearMomentum
--------------------------------------------------------------------------------------- */
kernel void kScale( float scale, float3 linearMomentumIn<>, out float3 linearMomentumOut<> ){
linearMomentumOut = scale*linearMomentumIn;
}
/**---------------------------------------------------------------------------------------
This kernel calculates the total linear momentum via a reduction
@param momentum momentum
@param linearMomentum total momentum
Note: treating the output linearMomentum as non-stream did not compile, even though
I found examples in Brook testing code that had that construct -- MSF 3/6/08
--------------------------------------------------------------------------------------- */
reduce void kReduceLinearMomentum( float3 momentum<>, reduce float3 linearMomentum<> ){
linearMomentum += momentum;
}
/**---------------------------------------------------------------------------------------
This kernel calculates the total linear momentum via a reduction
@param atomStrWidth atom stream width
@param numberOfAtoms number of atoms
@param momentum momentum
@param linearMomentum total momentum
--------------------------------------------------------------------------------------- */
kernel void kSumLinearMomentum( float atomStrWidth, float numberOfAtoms, float scale, float3 momentum[][],
out float3 linearMomentum<> ){
float atomCount;
float2 atomIndex;
atomCount = 0.0f;
linearMomentum = float3( 0.0f, 0.0f, 0.0f );
atomIndex.y = 0.0f;
atomIndex.x = 0.0f;
while( atomCount < numberOfAtoms ){
linearMomentum += momentum[atomIndex];
atomIndex.x += 1.0f;
if( atomIndex.x == atomStrWidth ){
atomIndex.x = 0.0f;
atomIndex.y += 1.0f;
}
atomCount += 1.0f;
}
linearMomentum *= scale;
}
/**---------------------------------------------------------------------------------------
This kernel calculates the total linear momentum via a reduction
@param momentum momentum
@param linearMomentum total momentum
Note: treating the output linearMomentum as non-stream did not compile, even though
I found examples in Brook testing code that had that construct -- MSF 3/6/08
--------------------------------------------------------------------------------------- */
reduce void kSum( float3 array<>, reduce float3 sum<> ){
//sum += array;
//sum += array.x + array.y + array.z;
//sum += float3( 1.0, 1.0, 1.0 );
/*
sum.x += 1.0;
sum.y += 1.0;
sum.z += 1.0;
*/
sum += array;
//sum += 1.0;
}
/**---------------------------------------------------------------------------------------
This kernel subtracts the (total linear momentum)/totalMass from the velocities
@param linearMomentum normalized linear momentum
@param velocities stream of velocities
@param velocitiesOut output stream of velocities
--------------------------------------------------------------------------------------- */
kernel void kRemoveLinearMomentum( float3 linearMomentum[], float3 velocities<>, out float3 velocitiesOut<> ){
velocitiesOut = velocities - linearMomentum[0];
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
void kCalculateLinearMomentum( ::brook::stream mass, ::brook::stream velocities, ::brook::stream linearMomentum );
void kReduceLinearMomentum( ::brook::stream momentum, ::brook::stream linearMomentum );
//void kSumLinearMomentum( ::brook::stream momentum, float3* linearMomentum );
void kScale( float scale, ::brook::stream linearMomentumIn, ::brook::stream linearMomentumOut );
void kRemoveLinearMomentum( ::brook::stream linearMomentum, ::brook::stream velocitiesIn, ::brook::stream velocitiesOut );
void kSum( ::brook::stream array, ::brook::stream sum );
/**---------------------------------------------------------------------------------------
This kernel calculates the total linear momentum via a reduction
@param atomStrWidth atom stream width
@param numberOfAtoms number of atoms
@param scale sum of inverse masses
@param momentum momentum
@param linearMomentum total momentum
--------------------------------------------------------------------------------------- */
void kSumLinearMomentum( float atomStrWidth, float numberOfAtoms, float scale, ::brook::stream momentum, ::brook::stream linearMomentum );
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
kernel void kgetxyz( float4 instr<>, out float3 outstr<> ){
outstr = instr.xyz;
}
//Zeroes out a stream
kernel void kzerof3( out float3 outstr<> ){
outstr = float3( 0.0f, 0.0f, 0.0f );
}
//Zeros out a stream
kernel void kzerof4( out float4 outstr<> ){
outstr = float4( 0.0f, 0.0f, 0.0f, 0.0f );
}
kernel void ksetf4( float4 val, out float4 outstr<> ){
outstr = val;
}
kernel void ksetStr3( float3 instr<>, out float3 outstr<> ){
outstr = instr;
}
kernel void kadd3( float3 in1<>, float3 in2<>, out float3 outstr<> ){
outstr = in1 + in2;
}
#ifndef __KCOMMON_H__
#define __KCOMMON_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
void kgetxyz(::brook::stream instr, ::brook::stream outstr);
void kzerof3(::brook::stream outstr);
void kzerof4(::brook::stream outstr);
void kzerof4(::brook::stream outstr);
void ksetf4(const float4 val, ::brook::stream outstr);
void kadd3( ::brook::stream instr1, ::brook::stream instr2, ::brook::stream outstr );
void ksetStr3( ::brook::stream instr, ::brook::stream outstr );
#endif // __KCOMMON_H__
#ifndef __KFORCE_H__
#define __KFORCE_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
//Define a generic force kernel prototype
//To make switching easier to look at
//This should match the kernels from kforce.br
typedef void (*gpuNBForceFunction)(
const float natoms,
const float dupfac,
const float strheight,
const float strwidth,
const float jstrwidth,
const float fstrwidth,
const float epsfac,
const float4 params,
::brook::stream posq,
::brook::stream isigeps,
::brook::stream sigma,
::brook::stream epsilon,
::brook::stream excl,
::brook::stream outforce1,
::brook::stream outforce2
);
void knbforce_CDLJ (const float natoms,
const float dupfac,
const float strheight,
const float strwidth,
const float jstrwidth,
const float fstrwidth,
const float epsfac,
::brook::stream posq,
::brook::stream isigeps,
::brook::stream sigma,
::brook::stream epsilon,
::brook::stream excl,
::brook::stream outforce1,
::brook::stream outforce2);
void knbforce_CDLJ4(
const float natoms,
const float nAtomsCeiling,
const float dupfac,
const float strheight,
const float strwidth,
const float jstrwidth,
const float fstrwidth,
const float epsfac,
::brook::stream posq,
::brook::stream charge,
::brook::stream isigeps,
::brook::stream sigma,
::brook::stream epsilon,
::brook::stream excl,
::brook::stream outforce1,
::brook::stream outforce2,
::brook::stream outforce3,
::brook::stream outforce4);
void kmerge_partial_forces (const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
::brook::stream pforce1,
::brook::stream pforce2,
::brook::stream force);
void kMergeFloat3_4(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
::brook::stream pforce1,
::brook::stream pforce2,
::brook::stream pforce3,
::brook::stream pforce4,
::brook::stream force );
void kMergeFloat3_4_nobranch(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
::brook::stream pforce1,
::brook::stream pforce2,
::brook::stream pforce3,
::brook::stream pforce4,
::brook::stream force );
typedef void ( *gpuNBForceFunction14 )
(const float natoms,
const float strwidth,
const float epsfac,
const float4 params,
::brook::stream atoms,
::brook::stream posq,
::brook::stream sigeps,
::brook::stream fi,
::brook::stream fj);
void kforce14_CDLJ
(const float natoms,
const float strwidth,
const float epsfac,
const float4 params,
::brook::stream atoms,
::brook::stream posq,
::brook::stream sigeps,
::brook::stream fi,
::brook::stream fj);
void kforce14_LDLJ
(const float natoms,
const float strwidth,
const float epsfac,
const float4 params,
::brook::stream atoms,
::brook::stream posq,
::brook::stream sigeps,
::brook::stream fi,
::brook::stream fj);
void kforce14_SFDLJ
(const float natoms,
const float strwidth,
const float epsfac,
const float4 params,
::brook::stream atoms,
::brook::stream posq,
::brook::stream sigeps,
::brook::stream fi,
::brook::stream fj);
void kforce14_SHEFFIELD
(const float natoms,
const float strwidth,
const float epsfac,
const float4 params,
::brook::stream atoms,
::brook::stream posq,
::brook::stream sigeps,
::brook::stream fi,
::brook::stream fj);
typedef void (*gpuBondedFunction)(
const float epsfac,
const float xstrwidth,
const float4 params,
::brook::stream posq,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kbonded_CDLJ (const float epsfac,
const float xstrwidth,
::brook::stream posq,
::brook::stream charge,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kbonded_CDLJDebug (const float epsfac,
const float xstrwidth,
const float4 params,
::brook::stream posq,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kbonded_LDLJ (const float epsfac,
const float xstrwidth,
const float4 params,
::brook::stream posq,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kbonded_SFDLJ (const float epsfac,
const float xstrwidth,
const float4 params,
::brook::stream posq,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kbonded_SHEFFIELD (const float epsfac,
const float xstrwidth,
const float4 params,
::brook::stream posq,
::brook::stream atoms,
::brook::stream parm0,
::brook::stream parm1,
::brook::stream parm2,
::brook::stream parm3,
::brook::stream parm4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl);
void kAddForces3_4(
const float conversion,
::brook::stream force1,
::brook::stream force2,
::brook::stream outForce );
#endif //__KFORCE_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
kernel float4 scalar_force_CDLJ( float4 qq, float epsfac, float4 sig, float4 eps, float4 r2 ) {
float4 invr, invrsig2, invrsig6;
float4 f;
invr = rsqrt( r2 );
invrsig2 = invr * sig;
invrsig2 = invrsig2 * invrsig2;
invrsig6 = invrsig2 * invrsig2 * invrsig2;
f = eps * ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
f += epsfac*qq*invr;
f *= invr*invr;
return f;
}
kernel float scalar_force_single_CDLJ( float qq, float epsfac, float sig, float eps, float r2 ) {
float invr, invrsig2, invrsig6;
float f;
invr = rsqrt( r2 );
invrsig2 = invr * sig;
invrsig2 = invrsig2 * invrsig2;
invrsig6 = invrsig2 * invrsig2 * invrsig2;
f = eps * ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
f += epsfac*qq*invr;
f *= invr*invr;
return f;
}
kernel float4 get_r2_CDLJ( float3 d1, float3 d2, float3 d3, float3 d4 ) {
float4 r2;
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
return r2;
}
kernel void knbforce_CDLJ (
float natoms,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps<>,
float4 sigma[][],
float4 epsilon[][],
float2 excl[][],
out float3 outforce1<>,
out float3 outforce2<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float2 exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
a_iatom = 2.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
linind = fmod( a_iatom, natoms );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
ipos1 = posq[ iatom ].xyz;
qi1 = posq[ iatom ].w;
iatom.x += 1;
ipos2 = posq[ iatom ].xyz;
qi2 = posq[ iatom ].w;
breakflag = 1.0f;
jatom.y = 0.0f;
which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1 - 0.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind/2.0f;
exclind.y = jstart * strwidth/4.0f;
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions.x, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos1 = posq[ jatom ].xyz;
qj.x = posq[ jatom ].w;
jatom.x += 1.0f;
jpos2 = posq[ jatom ].xyz;
qj.y = posq[ jatom ].w;
jatom.x += 1.0f;
jpos3 = posq[ jatom ].xyz;
qj.z = posq[ jatom ].w;
jatom.x += 1.0f;
jpos4 = posq[ jatom ].xyz;
qj.w = posq[ jatom ].w;
jatom.x += 1.0f;
sig = isigeps.x + jsig;
eps = isigeps.y * jeps;
qq = qi1 * qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
/*
outforce1.x += exclmask.x;
outforce1.x += exclmask.y;
outforce1.x += exclmask.z;
outforce1.x += exclmask.w;
if( exclmask.x > 0.5f ){
outforce1.x += 1.0f;
}
if( exclmask.y > 0.5f ){
outforce1.x += 1.0f;
}
if( exclmask.z > 0.5f ){
outforce1.x += 1.0f;
}
if( exclmask.w > 0.5f ){
outforce1.x += 1.0f;
}
*/
exclmask = fmod( exclusions.y, exclconst );
sig = isigeps.z + jsig;
eps = isigeps.w * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/*
outforce2.x += exclmask.x;
outforce2.x += exclmask.y;
outforce2.x += exclmask.z;
outforce2.x += exclmask.w;
if( exclmask.x > 0.5f ){
outforce2.x += 1.0f;
}
if( exclmask.y > 0.5f ){
outforce2.x += 1.0f;
}
if( exclmask.z > 0.5f ){
outforce2.x += 1.0f;
}
if( exclmask.w > 0.5f ){
outforce2.x += 1.0f;
}
*/
exclind.y += 1.0f;
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
}
jatom.y += 1.0f;
}
}
kernel void knbforce_CDLJ4(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float3 posq[][],
float charge[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
float maskMultiplier;
maskMultiplier = 1.0e+06f;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
/* --------------------------------------------------------------------- */
ipos1 = posq[ iatom ];
qi1 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos2 = posq[ iatom ];
qi2 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos3 = posq[ iatom ];
qi3 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos4 = posq[ iatom ];
qi4 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
/* --------------------------------------------------------------------- */
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos1 = posq[ jatom ];
qj.x = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos2 = posq[ jatom ];
qj.y = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos3 = posq[ jatom ];
qj.z = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos4 = posq[ jatom ];
qj.w = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
/* --------------------------------------------------------------------- */
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce3 += fs.x * d1;
outforce3 += fs.y * d2;
outforce3 += fs.z * d3;
outforce3 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
outforce4 += fs.x * d1;
outforce4 += fs.y * d2;
outforce4 += fs.z * d3;
outforce4 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void kforce14_CDLJ(
float natoms,
float strwidth,
float epsfac,
float4 params,
float2 atoms<>,
float4 posq[][],
float2 sigeps<>,
out float3 fi<>,
out float3 fj<>
)
{
float2 ai, aj;
float3 d1;
float r2;
float qq, fs;
//ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
//aj.y = floor( atoms.y / strwidth );
aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj.x = atoms.y - aj.y * strwidth;
d1 = posq[ai].xyz - posq[aj].xyz;
qq = posq[ai].w * posq[aj].w;
r2 = dot( d1, d1 );
fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2 );
fi = fs.x * d1;
fj = -fi;
}
kernel void kAcosV( float x<>, out float acosVal<> ){
float x2, x3, x5, x7, x9, x11, x13, x15, x17, x19;
float pi2 = 1.57079632679489661923f;
float coefficient3 = 1.0f/6.0f;
float coefficient5 = 3.0f/40.0f;
float coefficient7 = 5.0f/112.0f;
float coefficient9 = 35.0f/1152.0f;
float coefficient11 = 63.0f/2816.0f;
float coefficient13 = 231.0f/13312.0f;
float coefficient15 = 143.0f/10240.0f;
float coefficient17 = 6435.0f/557056.0f;
float coefficient19 = 12155.0f/1245184.0f;
// ---------------------------------------------------------------------------------------
x2 = x*x;
x3 = x*x2;
x5 = x3*x2;
x7 = x5*x2;
x9 = x7*x2;
x11 = x9*x2;
x13 = x11*x2;
x15 = x13*x2;
x17 = x15*x2;
x19 = x17*x2;
acosVal = x19*coefficient19;
acosVal += x17*coefficient17;
acosVal += x15*coefficient15;
acosVal += x13*coefficient13;
acosVal += x11*coefficient11;
acosVal += x9*coefficient9;
acosVal += x7*coefficient7;
acosVal += x5*coefficient5;
acosVal += x3*coefficient3 + x;
acosVal = pi2 - acosVal;
}
kernel void kAcosTanV( float xIn<>, out float acosVal<> ){
const float c1 = 48.70107004404898384f;
const float c2 = 49.5326263772254345f;
const float c3 = 9.40604244231624f;
const float c4 = 48.70107004404996166f;
const float c5 = 65.7663163908956299f;
const float c6 = 21.587934067020262f;
const float tantwelfthpi = 0.26794919243112270647255365849413f;
const float tansixthpi = 0.57735026918962576450914878050196f;
const float sixthpi = 0.52359877559829887307710723054658f;
const float halfpi = 1.5707963267948966192313216916398f;
float x2, y, x;
float sign, complement, region;
// ---------------------------------------------------------------------------------------
complement = 0.0f;
region = 0.0f;
x = xIn;
y = rsqrt( (1.0f + x)*(1.0f - x) );
x *= y;
if( x < 0.0f ){
x = -x;
sign = -1.0f;
} else {
sign = 1.0f;
}
if( x > 1.0f ){
x = 1.0f/x;
complement = 1.0f;
} else {
complement = 0.0f;
}
if( x > tantwelfthpi ){
x = (x-tansixthpi)/(1.0f + tansixthpi*x);
region = sixthpi;
} else {
region = 0.0f;
}
x2 = x*x;
y = (x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2))));
y += region;
y = (complement > 0.0f) ? (halfpi- y) : y;
acosVal = halfpi - sign*y;
}
kernel void kAcos( float xIn, out float acosVal<> ){
const float c1 = 48.70107004404898384f;
const float c2 = 49.5326263772254345f;
const float c3 = 9.40604244231624f;
const float c4 = 48.70107004404996166f;
const float c5 = 65.7663163908956299f;
const float c6 = 21.587934067020262f;
const float tantwelfthpi = 0.26794919243112270647255365849413f;
const float tansixthpi = 0.57735026918962576450914878050196f;
const float sixthpi = 0.52359877559829887307710723054658f;
const float halfpi = 1.5707963267948966192313216916398f;
float x2, y, x;
float sign, complement, region;
// ---------------------------------------------------------------------------------------
// acos(x) = atan(x/sqrt(1-x**2))
x = xIn;
y = rsqrt( (1.0f + x)*(1.0f - x) );
x *= y;
if( x < 0.0f ){
x = -x;
sign = -1.0f;
} else {
sign = 1.0f;
}
if( x > 1.0f ){
x = 1.0f/x;
complement = 1.0f;
} else {
complement = 0.0f;
}
if( x > tantwelfthpi ){
x = (x - tansixthpi)/(1.0f + tansixthpi*x);
region = sixthpi;
} else {
region = 0.0f;
}
x2 = x*x;
y = (x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2))));
y += region;
y = (complement > 0.0f) ? (halfpi- y) : y;
acosVal = halfpi - sign*y;
}
kernel void kbonded_CDLJ (
float epsfac,
float xstrwidth,
float3 posq[][],
float charge<>,
float4 atoms<>,
float4 parm0<>,
float4 parm1<>,
float4 parm2<>,
float4 parm3<>,
float4 parm4<>,
out float3 fi<>,
out float3 fj<>,
out float3 fk<>,
out float3 fl<>
){
float3 rij, rkj, rkl, ril;
float2 ai, aj, ak, al;
float3 m, n;
float sgnphi;
float cosfac;
float ddphi, ddphi_rb;
float3 u, v, s;
float invlkj, invlij, invlkl, msq, nsq, cos_phi, sin_phi;
float3 uij, ukj, ukl;
float phi, mdphi;
float rij_d_ukj, ukj_d_rkl;
float normfac;
float qq;
float3 fi_ang, fj_ang, fk_ang;
float3 fi_bond;
float cii, ckk, cik;
float fs, st, sth;
float3 fi_pair;
float r2 ;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float3 posqi, posqj, posqk, posql;
float sig, eps;
// i-coordinates
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
// j-coordinates
if ( atoms.y > -0.5f ) {
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
} else {
posqj = float3( 0.0f, 1.0f, 0.0f );
}
// k-coordinates
if ( atoms.z > -0.5f ) {
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
} else {
posqk = float3( 1.0f, 1.0f, 0.0f );
}
// l-coordinates
if ( atoms.w > -0.5f ) {
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
} else {
posql = float3( 1.0f, 1.0f, 1.0f );
}
rij = posqi - posqj;
rkj = posqk - posqj;
rkl = posqk - posql;
ril = posqi - posql;
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
m = cross( uij, ukj );
n = cross( ukj, ukl );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
//theta1 = acos( costheta1 );
kAcos( costheta1, theta1 );
invsintheta1 = rsqrt( (1.0f - costheta1) * (1.0f + costheta1) );
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
//theta2 = acos( costheta2 );
kAcos( costheta2, theta2 );
invsintheta2 = rsqrt( (1.0f - costheta2) * (1.0f + costheta2) );
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
sgnphi = sign( dot(rij, n) );
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( (1.0f - cos_phi) * (1.0f + cos_phi) );
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
ddphi += ddphi_rb;
/*
fi = float3( cos_phi, sgnphi, ddphi_rb );
fj = float3( sin_phi, parm1.x, parm0.w );
fk = float3( parm0.z, parm0.y, parm0.x );
fl = float3( ddphi, 0.0f, 0.0f );
*/
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
fj = s - fi;
fk = -(s + fl);
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta1;
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta2;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
if( parm4.z > -0.5f ){
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( charge, epsfac, parm4.z, parm4.w, r2 );
fi_pair = fs * ril;
fi += fi_pair;
fl -= fi_pair;
//fi = float3( charge, parm4.z, parm4.w );
//fj = float3( 1.0f, 2.f, 3.0f );
/*
} else {
fi = float3( 0.0f, 0.f, 0.0f );
fj = float3( 0.0f, 0.f, 0.0f );
fk = float3( 0.0f, 0.f, 0.0f );
fl = float3( 0.0f, 0.f, 0.0f );
*/
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
//124 FLOPS [??? total]
kernel void bornSumInternal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float iAtomicRadii, out float4 bornSum<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2;
float4 u_ij, u_ij2;
float4 t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
float4 diff_u2_l2;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
mask = r2 < smallValue4 ? zero4 : one4;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 ); //4
rInverse = rsqrt( r2 ); //8
rPlusScaledJ = r + jAtomicRadiiScaled; //4
rMinusScaledJ = r - jAtomicRadiiScaled; //4
rMinusScaledJAbs = abs( rMinusScaledJ );
//40
// ---------------------------------------------------------------------------------------
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij; //4
l_ij2 = l_ij*l_ij; //4
u_ij = 1.0f/rPlusScaledJ; //4
u_ij2 = u_ij*u_ij; //4
diff_u2_l2 = u_ij2 - l_ij2; //4
logRatio = log(u_ij/l_ij); //4
//24
// ---------------------------------------------------------------------------------------
// terms associated w/ dL/dr & dU/dr are zero and not included
// Born radius term
bornSum = l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2; //10*4
t3 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4; //12 for true, but don't know how to average so saying 0
bornSum += t3; //4
bornSum *= mask; //4
//60
}
//???? FLOPS
kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float3 posq[][],
float2 scaledAtomicRadii[][],
out float4 bornForce<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float tmp;
float tmp2;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// fetch position, (atomic radius - dielectric offset )
i1pos = posq[ iAtom ];
i1AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i2pos = posq[ iAtom ];
i2AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i3pos = posq[ iAtom ];
i3AtomicRadii = scaledAtomicRadii[ iAtom ].y;
iAtom.x += 1;
i4pos = posq[ iAtom ];
i4AtomicRadii = scaledAtomicRadii[ iAtom ].y;
bornForce = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// inner loop indices setup
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j1pos = posq[ jAtom ];
jAtom.x += 1.0f;
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j2pos = posq[ jAtom ];
jAtom.x += 1.0f;
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j3pos = posq[ jAtom ];
jAtom.x += 1.0f;
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j4pos = posq[ jAtom ];
jAtom.x += 1.0f;
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
d1 = j1pos - i1pos; //3
d2 = j2pos - i1pos; //3
d3 = j3pos - i1pos; //3
d4 = j4pos - i1pos; //3
// i = 1
bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i1AtomicRadii, bornSum ); //112 : 368(?)
bornForce.x += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 2
d1 = j1pos - i2pos; //3
d2 = j2pos - i2pos; //3
d3 = j3pos - i2pos; //3
d4 = j4pos - i2pos; //3
bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i2AtomicRadii, bornSum );//112 : 368(?)
bornForce.y += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 3
d1 = j1pos - i3pos; //3
d2 = j2pos - i3pos; //3
d3 = j3pos - i3pos; //3
d4 = j4pos - i3pos; //3
bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i3AtomicRadii, bornSum );//112 : 368(?)
bornForce.z += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// i = 4
d1 = j1pos - i4pos; //3
d2 = j2pos - i4pos; //3
d3 = j3pos - i4pos; //3
d4 = j4pos - i4pos; //3
bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i4AtomicRadii, bornSum );//112 : 368(?)
bornForce.w += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
// ---------------------------------------------------------------------------------------
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
kernel void kPostCalculateBornRadii_nobranch(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float conversion,
float mergeNonObcForces,
float4 pstream1[][],
float atomicRadii<>,
out float bornRadii<>,
out float obcChain<> ){
// ---------------------------------------------------------------------------------------
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
float4 o1;
float tmp;
float2 iAtom;
float4 forces;
float expPlus, expMinus;
// ---------------------------------------------------------------------------------------
// constants -- OBC Type II
const float alphaObc = 1.0f;
const float betaObc = 0.8f;
const float gammaObc = 4.85f;
const float dielectricOffset = 0.009f;
// ---------------------------------------------------------------------------------------
// given atom index find force indices and streams
pindex = indexof( obcChain );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
// add current forces in inStream to forces stored in pstreams
// the .w entry is Born sum values; it will be used to calculate the
// Born radii and obcChain term
bornSum = 0.0f;
// sum over j-loop 'duplications' by gathering from pstreams
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
tmp = qOff < 0.5f ? o1.x : o1.y;
tmp = qOff < 1.5f ? tmp : o1.z;
tmp = qOff < 2.5f ? tmp : o1.w;
bornSum += tmp;
forceIndex += roundNatoms;
}
// compute Born radii and ObcChain
atomicRadiiOffset = atomicRadii - dielectricOffset;
bornSum *= 0.5f*atomicRadiiOffset;
sum2 = bornSum*bornSum;
sum3 = bornSum*sum2;
// Tanh does not exist?
// calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
// tanhSum = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
tanhSum = bornSum - betaObc*sum2 + gammaObc*sum3;
expPlus = exp( tanhSum );
expMinus = 1.0f/expPlus;
tanhSum = ( expPlus - expMinus )/( expPlus + expMinus );
bornRadii = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );
obcIntermediate = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
obcChain = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
if( atomIndex >= natoms ){
bornRadii = 0.0f;
obcChain = 0.0f;
}
}
kernel void loop1Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jBornR,
float4 jQ, float iBornR, float iQ, out float4 dGpol_dr<>,
out float4 dGpol_dalpha2_ij<> ){
// ---------------------------------------------------------------------------------------
float4 r2, alpha2_ij, D_ij, expTerm, denominator2, denominator, Gpol;
// ---------------------------------------------------------------------------------------
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
alpha2_ij = jBornR*iBornR;
D_ij = r2/(4.0f*alpha2_ij);
expTerm = exp( -D_ij );
denominator2 = r2 + alpha2_ij*expTerm;
denominator = sqrt( denominator2 );
Gpol = jQ/denominator;
Gpol *= iQ;
dGpol_dr = -Gpol*( 1.0f - 0.25f*expTerm )/denominator2;
dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*( 1.0f + D_ij )*jBornR/denominator2;
}
/* ---------------------------------------------------------------------------------------
Calculate nonpolar ACE term (Simbios)
bornRadius: Born radius
vdwRadius: Vdw radius
duplicationFactor: duplication factor
aceForce: ACE term
--------------------------------------------------------------------------------------- */
kernel void kAceNonPolarLoop1( float iBornRadius, float iVdwRadius, float duplicationFactor,
out float aceForce<> ){
// ---------------------------------------------------------------------------------------
// nonpolar term
float iSurface;
float iAceTerm;
// ---------------------------------------------------------------------------------------
// constants
// solvent radius
const float probeRadius = 0.14f;
// PI*4*6*0.0054*1000 (0.0054=asolv from Tinker)
//const float PI_24_aI = -0.3694512961;
const float PI_24_aI = -407.1504079f;
// ---------------------------------------------------------------------------------------
// etch i position and partial charge
// e = ai * term * (ri+probe)**2 * (ri/rb)**6
// (drbi) = drb(i) - 6.0fd0*e/rb
// (rI+probe)**2
iSurface = (iVdwRadius+probeRadius);
iSurface = iSurface*iSurface;
// (rI/rB)**6
iAceTerm = iVdwRadius/iBornRadius;
iAceTerm = iAceTerm*iAceTerm*iAceTerm;
iAceTerm = iAceTerm*iAceTerm;
aceForce = iSurface*iAceTerm*PI_24_aI/(duplicationFactor*iBornRadius);
}
/* ---------------------------------------------------------------------------------------
Calculate first loop force terms (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms -- accounts for unrolling
duplicationFactor: number of threads for inner loop
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
soluteDielectric: solute dielectric
solventDielectric: solvent dielectric
includeAce: include ACE term
posq: atom positions and charge
bornRadii: Born radii
nonpolarForce: nonpolar force (0 if nonpolar not included, else
ACE value)
bornForce1: i-unroll first force component, including dBornR/dr in .w
bornForce2: i-unroll second force component, including dBornR/dr in .w
bornForce3: i-unroll first force component, including dBornR/dr in .w
bornForce4: i-unroll second force component, including dBornR/dr in .w
--------------------------------------------------------------------------------------- */
kernel void kObcLoop1( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float soluteDielectric,
float solventDielectric, float includeAce,
float3 posq[][], float bornRadii[][], float2 atomicRadii[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// Born radii
float i1BornR, i2BornR, i3BornR, i4BornR;
float j1BornR, j2BornR, j3BornR, j4BornR;
float4 jBornR;
// atomic radii
float i1AtomicR, i2AtomicR, i3AtomicR, i4AtomicR;
// i,j coordinates
float3 i1Pos, i2Pos, i3Pos, i4Pos;
float3 j1Pos, j2Pos, j3Pos, j4Pos;
float4 j1PosQ, j2PosQ, j3PosQ, j4PosQ;
// i, j partial charges
float i1Q, i2Q, i3Q, i4Q;
float j1Q, j2Q, j3Q, j4Q;
float4 jQ;
float aceForce;
// delta coordinates
float3 d1, d2, d3, d4;
// intermediate terms
float4 dGpol_dr, dGpol_dalpha2_ij;
// indices
float2 iAtom;
float forceIndex;
// This is forceIndex mod numberOfAtoms, the true i index
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float tmp;
// ---------------------------------------------------------------------------------------
// electricConstant = -166.0f2691;
// preFactor = 2.0f*electricConstant*(1.0f - (1.0f/waterDielectric))
float preFactor = -332.05382f;
const float I_Unroll = 4.0f;
const float3 zero3 = float3( 0.0f, 0.0f, 0.0f );
// ---------------------------------------------------------------------------------------
preFactor *= ( (1.0f/soluteDielectric) - (1.0f/solventDielectric) );
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather index
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - fmod(iAtomLinearIndex, streamWidth ))/streamWidth );
// ---------------------------------------------------------------------------------------
// etch i1 position and partial charge
jQ = posq[ iAtom ];
i1Pos = jQ.xyz;
i1Q = atomicRadii[ iAtom ].y;
i1Q *= preFactor;
i1BornR = bornRadii[ iAtom ];
i1AtomicR = atomicRadii[ iAtom ].x;
kAceNonPolarLoop1( i1BornR, i1AtomicR, duplicationFactor, aceForce );
bornForce1.xyz = zero3;
bornForce1.w = includeAce > 0.5f ? aceForce : 0.0f;
// ---------------------------------------------------------------------------------------
// etch i2 position and partial charge
iAtom.x += 1;
jQ = posq[ iAtom ];
i2Pos = jQ.xyz;
i2Q = atomicRadii[ iAtom ].y;
i2Q *= preFactor;
i2BornR = bornRadii[ iAtom ];
i2AtomicR = atomicRadii[ iAtom ].x;
kAceNonPolarLoop1( i2BornR, i2AtomicR, duplicationFactor, aceForce );
bornForce2.xyz = zero3;
bornForce2.w = includeAce > 0.5f ? aceForce : 0.0f;
// ---------------------------------------------------------------------------------------
// etch i3 position and partial charge
iAtom.x += 1;
jQ = posq[ iAtom ];
i3Pos = jQ.xyz;
i3Q = atomicRadii[ iAtom ].y;
i3Q *= preFactor;
i3BornR = bornRadii[ iAtom ];
i3AtomicR = atomicRadii[ iAtom ].x;
kAceNonPolarLoop1( i3BornR, i3AtomicR, duplicationFactor, aceForce );
bornForce3.xyz = zero3;
bornForce3.w = includeAce > 0.5f ? aceForce : 0.0f;
// ---------------------------------------------------------------------------------------
// etch i4 position and partial charge
iAtom.x += 1;
jQ = posq[ iAtom ];
i4Pos = jQ.xyz;
i4Q = atomicRadii[ iAtom ].y;
i4Q *= preFactor;
i4BornR = bornRadii[ iAtom ];
i4AtomicR = atomicRadii[ iAtom ].x;
kAceNonPolarLoop1( i4BornR, i4AtomicR, duplicationFactor, aceForce );
bornForce4.xyz = zero3;
bornForce4.w = includeAce > 0.5f ? aceForce : 0.0f;
// ---------------------------------------------------------------------------------------
// inner loop setup
// if dupFac == 4, I_UnRoll =2, then breaking inner loop into two segments
// to increase number of threads in flight
// forceStreamSz = N*RepFac/I_UnRoll
// forceIndex = I_UnRoll*( a.x + a.y*forceStreamSz )
// whichRep = 0 or 1
// jBlock = 1 + floor[ N/(duplicationFactor*streamWidth) ]
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
jBlock = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ) {
// ---------------------------------------------------------------------------------------
// gather required values
j1Pos = posq[ jAtom ];
j1Q = atomicRadii[ jAtom ].y;
j1BornR = bornRadii[ jAtom ];
jAtom.x += 1.0f;
j2Pos = posq[ jAtom ];
j2Q = atomicRadii[ jAtom ].y;
j2BornR = bornRadii[ jAtom ];
jAtom.x += 1.0f;
j3Pos = posq[ jAtom ];
j3Q = atomicRadii[ jAtom ].y;
j3BornR = bornRadii[ jAtom ];
jAtom.x += 1.0f;
j4Pos = posq[ jAtom ];
j4Q = atomicRadii[ jAtom ].y;
j4BornR = bornRadii[ jAtom ];
jAtom.x += 1.0f;
jBornR = float4( j1BornR, j2BornR, j3BornR, j4BornR );
jQ = float4( j1Q, j2Q, j3Q, j4Q );
// ---------------------------------------------------------------------------------------
// i == 1
d1 = i1Pos - j1Pos;
d2 = i1Pos - j2Pos;
d3 = i1Pos - j3Pos;
d4 = i1Pos - j4Pos;
loop1Internal( d1, d2, d3, d4, jBornR, jQ, i1BornR, i1Q, dGpol_dr, dGpol_dalpha2_ij );
bornForce1.xyz += dGpol_dr.x*d1;
bornForce1.xyz += dGpol_dr.y*d2;
bornForce1.xyz += dGpol_dr.z*d3;
bornForce1.xyz += dGpol_dr.w*d4;
bornForce1.w += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
// ---------------------------------------------------------------------------------------
// i == 2
d1 = i2Pos - j1Pos;
d2 = i2Pos - j2Pos;
d3 = i2Pos - j3Pos;
d4 = i2Pos - j4Pos;
loop1Internal( d1, d2, d3, d4, jBornR, jQ, i2BornR, i2Q, dGpol_dr, dGpol_dalpha2_ij );
bornForce2.xyz += dGpol_dr.x*d1;
bornForce2.xyz += dGpol_dr.y*d2;
bornForce2.xyz += dGpol_dr.z*d3;
bornForce2.xyz += dGpol_dr.w*d4;
bornForce2.w += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
// ---------------------------------------------------------------------------------------
// i == 3
d1 = i3Pos - j1Pos;
d2 = i3Pos - j2Pos;
d3 = i3Pos - j3Pos;
d4 = i3Pos - j4Pos;
loop1Internal( d1, d2, d3, d4, jBornR, jQ, i3BornR, i3Q, dGpol_dr, dGpol_dalpha2_ij );
bornForce3.xyz += dGpol_dr.x*d1;
bornForce3.xyz += dGpol_dr.y*d2;
bornForce3.xyz += dGpol_dr.z*d3;
bornForce3.xyz += dGpol_dr.w*d4;
bornForce3.w += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
// ---------------------------------------------------------------------------------------
// i == 4
d1 = i4Pos - j1Pos;
d2 = i4Pos - j2Pos;
d3 = i4Pos - j3Pos;
d4 = i4Pos - j4Pos;
loop1Internal( d1, d2, d3, d4, jBornR, jQ, i4BornR, i4Q, dGpol_dr, dGpol_dalpha2_ij );
bornForce4.xyz += dGpol_dr.x*d1;
bornForce4.xyz += dGpol_dr.y*d2;
bornForce4.xyz += dGpol_dr.z*d3;
bornForce4.xyz += dGpol_dr.w*d4;
bornForce4.w += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
// ---------------------------------------------------------------------------------------
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
// The inner loop by definition creates divergent paths. Chances are
// fair that we will take all sides of the branch anyway, so this
// verion uses manual predication
kernel void kPostObcLoop1_nobranch(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
float obcChain<>,
float bornRadii<>,
out float4 outstream<>,
out float bornRadii2Force<> )
{
// ---------------------------------------------------------------------------------------
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float4 o1,o2,o3,o4;
float4 tmp;
// given atom index find force indices and streams
pindex = indexof( outstream );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
o2 = pstream2[ pindex ];
o3 = pstream3[ pindex ];
o4 = pstream4[ pindex ];
tmp = qOff < 0.5f ? o1 : o2;
tmp = qOff < 1.5f ? tmp : o3;
tmp = qOff < 2.5f ? tmp : o4;
outstream += tmp;
forceIndex += roundNatoms;
}
bornRadii2Force = obcChain*bornRadii*bornRadii*outstream.w;
}
//156 FLOPS [172 total]
kernel void loop2Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2;
float4 u_ij, u_ij2;
float4 t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
float4 diff_u2_l2;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 ); //4
rInverse = rsqrt( r2 ); //8
r2Inverse = rInverse*rInverse; //4
rPlusScaledJ = r + jAtomicRadiiScaled; //4
rMinusScaledJ = r - jAtomicRadiiScaled; //4
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij; //4
l_ij2 = l_ij*l_ij; //4
u_ij = 1.0f/rPlusScaledJ; //4
u_ij2 = u_ij*u_ij; //4
diff_u2_l2 = u_ij2 - l_ij2; //4
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled; //4
logRatio = log(u_ij/l_ij); //4
// terms associated w/ dL/dr & dU/dr are zero and not included
t3 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse; //7*4
de = mask*bornForce*t3*rInverse; //12
// de = float4( 1.0f, 1.0f, 1.0f, 1.0f );
// de = scaledRadiusJ2;
// de = mask*t3;
// de = mask*rPlusScaledJ;
// de = mask*r2;
// de = mask;
// de = mask*rInverse;
// de = mask*bornForce;
// Born radius term
// bornSum = l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
/*
bornSum = l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2; //10*4
t3 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4; //12 for true, but don't know how to average so saying 0
bornSum += t3; //4
bornSum *= mask; //4
*/
}
kernel void loop2InternalR2( float4 r2, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2Inverse;
float4 l_ij, l_ij2;
float4 u_ij, u_ij2;
float4 t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
float4 diff_u2_l2;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 );
rInverse = rsqrt( r2 );
r2Inverse = rInverse*rInverse;
rPlusScaledJ = r + jAtomicRadiiScaled;
rMinusScaledJ = r - jAtomicRadiiScaled;
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij;
l_ij2 = l_ij*l_ij;
u_ij = 1.0f/rPlusScaledJ;
u_ij2 = u_ij*u_ij;
diff_u2_l2 = u_ij2 - l_ij2;
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled;
logRatio = log(u_ij/l_ij);
// terms associated w/ dL/dr & dU/dr are zero and not included
t3 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse;
de = mask*bornForce*t3*rInverse;
// de = float4( 1.0f, 1.0f, 1.0f, 1.0f );
// de = scaledRadiusJ2;
// de = mask*t3;
// de = mask*rPlusScaledJ;
// de = mask*r2;
// de = mask;
// de = mask*rInverse;
// de = mask*bornForce;
// Born radius term
// bornSum = l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
bornSum = l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
t3 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4;
bornSum += t3;
bornSum *= mask;
}
/* This is a copy of loop2Internal(), but w/ the calculation of bornSum removed */
//112 FLOPS
kernel void loop2InternalNoSum( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
float bornForce, float iAtomicRadii, out float4 de<> ){
// ---------------------------------------------------------------------------------------
float4 r, rInverse, r2, r2Inverse;
float4 l_ij, l_ij2;
float4 u_ij, u_ij2;
float4 t3;
float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
float4 iAtomicRadii4;
const float bigValue = 1000000.0f;
const float4 bigValue4 = float4( bigValue, bigValue, bigValue, bigValue );
const float smallValue = 0.000001f;
const float4 smallValue4 = float4( smallValue, smallValue, smallValue, smallValue );
const float4 zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
const float4 one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
float4 mask;
// ---------------------------------------------------------------------------------------
iAtomicRadii4 = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
// ---------------------------------------------------------------------------------------
// not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
// mask = r2 < smallValue4 ? zero4 : one4;
// r2 = r2 < smallValue4 ? bigValue4 : r2;
// ---------------------------------------------------------------------------------------
r = sqrt( r2 ); //4
rInverse = rsqrt( r2 ); //8
r2Inverse = rInverse*rInverse; //4
rPlusScaledJ = r + jAtomicRadiiScaled; //4
rMinusScaledJ = r - jAtomicRadiiScaled; //4
rMinusScaledJAbs = abs( rMinusScaledJ );
// ---------------------------------------------------------------------------------------
// mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : mask;
mask = iAtomicRadii4 >= rPlusScaledJ ? zero4 : one4;
l_ij = iAtomicRadii4 > rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
// ---------------------------------------------------------------------------------------
l_ij = 1.0f/l_ij; //4
l_ij2 = l_ij*l_ij; //4
u_ij = 1.0f/rPlusScaledJ; //4
u_ij2 = u_ij*u_ij; //4
scaledRadiusJ2 = jAtomicRadiiScaled*jAtomicRadiiScaled; //4
logRatio = log(u_ij/l_ij); //4
// terms associated w/ dL/dr & dU/dr are zero and not included
t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse; //8*4
de = mask*bornForce*t3*rInverse; //12
}
/* ---------------------------------------------------------------------------------------
Calculate Loop 2 OBC forces (Simbios)
numberOfAtoms: no. of atoms
roundedUpAtoms: rounded up number of atoms based on unroll
duplicationFactor: ?
strheight: atom stream height
streamWidth: atom stream width
fstreamWidth: force stream width (output -- i-unroll)
posq: atom positions and charge
atomicRadii: atomic radii
scaledAtomicRadii: scaled atomic radii
bornForceFactor: (Born radii)**2*bornForce*Obcforce
bornForce1: i-unroll first force component, including dBorn_r/dr in .w
bornForce2: i-unroll second force component, including dBorn_r/dr in .w
bornForce3: i-unroll third force component, including dBorn_r/dr in .w
bornForce4: i-unroll fourth force component, including dBorn_r/dr in .w
#endif
--------------------------------------------------------------------------------------- */
//1376 FLOPS
kernel void kObcLoop2( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth, float3 posq[][],
float2 scaledAtomicRadii[][], float bornForceFactor[][],
out float4 bornForce1<>, out float4 bornForce2<>,
out float4 bornForce3<>, out float4 bornForce4<> ){
// ---------------------------------------------------------------------------------------
// atomic radii: radius - dielectric offset
float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
float4 iAtomicRadii, jAtomicRadii;
// scaled atomic radii: scaleFactor*( radius - dielectric offset )
float i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
float j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
float4 iScaledAtomicRadii, jScaledAtomicRadii;
// (Born radii**2)*BornForce*ObcChainForce
float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
//float4 iBornForceFactor;
//float4 jBornForceFactor;
// i,j coordinates
float3 i1pos, i2pos, i3pos, i4pos;
float3 j1pos, j2pos, j3pos, j4pos;
// delta coordinates
float3 d1, d2, d3, d4;
// indices
float2 iAtom;
float forceIndex;
//This is forceIndex mod numberOfAtoms, the true i index
float4 de, bornSum;
float iAtomLinearIndex, jLinind;
float2 jAtom;
float jEnd, jStart, jBlock;
float whichRep;
float tmp;
float tmp2;
// ---------------------------------------------------------------------------------------
// constants
const float I_Unroll = 4.0f;
// ---------------------------------------------------------------------------------------
// set linear index
iAtom = indexof( bornForce1 );
forceIndex = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
iAtomLinearIndex = fmod( forceIndex, roundedUpAtoms );
// ---------------------------------------------------------------------------------------
// set gather coordinates
iAtom.x = fmod( iAtomLinearIndex, streamWidth );
iAtom.y = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
// ---------------------------------------------------------------------------------------
// etch i1 position, Born prefactor, atomic radius
i1pos = posq[ iAtom ];
i1BornForceFactor = bornForceFactor[ iAtom ];
i1AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i1ScaledAtomicRadii = scaledAtomicRadii[ iAtom ].x;
bornForce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i2 position, Born prefactor, atomic radius
iAtom.x += 1;
i2pos = posq[ iAtom ];
i2BornForceFactor = bornForceFactor[ iAtom ];
i2AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i2ScaledAtomicRadii = scaledAtomicRadii[ iAtom ].x;
bornForce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i3 position, Born prefactor, atomic radius
iAtom.x += 1;
i3pos = posq[ iAtom ];
i3BornForceFactor = bornForceFactor[ iAtom ];
i3AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i3ScaledAtomicRadii = scaledAtomicRadii[ iAtom ].x;
bornForce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// etch i4 position, Born prefactor, atomic radius
iAtom.x += 1;
i4pos = posq[ iAtom ];
i4BornForceFactor = bornForceFactor[ iAtom ];
i4AtomicRadii = scaledAtomicRadii[ iAtom ].y;
i4ScaledAtomicRadii = scaledAtomicRadii[ iAtom ].x;
bornForce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// create float4 for main vars
iAtomicRadii = float4( i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii );
iScaledAtomicRadii = float4( i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
// inner loop indices setup
//changed the following instruction for rounding issues on some ASICs
//whichRep = floor( forceIndex / roundedUpAtoms );
tmp = fmod(forceIndex, roundedUpAtoms);
whichRep = round((forceIndex - tmp)/roundedUpAtoms);
jBlock = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
jStart = whichRep*jBlock;
jEnd = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
jAtom.y = jStart;
jLinind = jAtom.y*streamWidth;
// ---------------------------------------------------------------------------------------
while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind ) > 0.9f ){
jAtom.x = 0.0f;
while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){
// ---------------------------------------------------------------------------------------
// gather required values
j1BornForceFactor = bornForceFactor[ jAtom ];
j1AtomicRadii = scaledAtomicRadii[ jAtom ].y;
j1ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j1pos = posq[ jAtom ];
jAtom.x += 1.0f;
j2BornForceFactor = bornForceFactor[ jAtom ];
j2AtomicRadii = scaledAtomicRadii[ jAtom ].y;
j2ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j2pos = posq[ jAtom ];
jAtom.x += 1.0f;
j3BornForceFactor = bornForceFactor[ jAtom ];
j3AtomicRadii = scaledAtomicRadii[ jAtom ].y;
j3ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j3pos = posq[ jAtom ];
jAtom.x += 1.0f;
j4BornForceFactor = bornForceFactor[ jAtom ];
j4AtomicRadii = scaledAtomicRadii[ jAtom ].y;
j4ScaledAtomicRadii = scaledAtomicRadii[ jAtom ].x;
j4pos = posq[ jAtom ];
jAtom.x += 1.0f;
jAtomicRadii = float4( j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii );
jScaledAtomicRadii = float4( j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
// ---------------------------------------------------------------------------------------
// First set of 4 -- i == 1
d1 = j1pos - i1pos; //3
d2 = j2pos - i1pos; //3
d3 = j3pos - i1pos; //3
d4 = j4pos - i1pos; //3
// i = 1
loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de ); //156 : 368
bornForce1.xyz += de.x*d1; //3*2=6
bornForce1.xyz += de.y*d2; //3*2=6
bornForce1.xyz += de.z*d3; //3*2=6
bornForce1.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// i = 2
d1 = j1pos - i2pos; //3
d2 = j2pos - i2pos; //3
d3 = j3pos - i2pos; //3
d4 = j4pos - i2pos; //3
loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de );//156 : 368
bornForce2.xyz += de.x*d1; //3*2=6
bornForce2.xyz += de.y*d2; //3*2=6
bornForce2.xyz += de.z*d3; //3*2=6
bornForce2.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// i = 3
d1 = j1pos - i3pos; //3
d2 = j2pos - i3pos; //3
d3 = j3pos - i3pos; //3
d4 = j4pos - i3pos; //3
loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de );//156 : 368
bornForce3.xyz += de.x*d1; //3*2=6
bornForce3.xyz += de.y*d2; //3*2=6
bornForce3.xyz += de.z*d3; //3*2=6
bornForce3.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// i = 4
d1 = j1pos - i4pos; //3
d2 = j2pos - i4pos; //3
d3 = j3pos - i4pos; //3
d4 = j4pos - i4pos; //3
loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de );//156 : 366
bornForce4.xyz += de.x*d1; //3*2=6
bornForce4.xyz += de.y*d2; //3*2=6
bornForce4.xyz += de.z*d3; //3*2=6
bornForce4.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// j = 1
d1 = j1pos - i1pos; //3
d2 = j1pos - i2pos; //3
d3 = j1pos - i3pos; //3
d4 = j1pos - i4pos; //3
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de ); //112 : 304
bornForce1.xyz += de.x*d1; //3*2=6
bornForce2.xyz += de.y*d2; //3*2=6
bornForce3.xyz += de.z*d3; //3*2=6
bornForce4.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// j = 2
d1 = j2pos - i1pos; //3
d2 = j2pos - i2pos; //3
d3 = j2pos - i3pos; //3
d4 = j2pos - i4pos; //3
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de ); //112 : 304
bornForce1.xyz += de.x*d1; //3*2=6
bornForce2.xyz += de.y*d2; //3*2=6
bornForce3.xyz += de.z*d3; //3*2=6
bornForce4.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// j = 3
d1 = j3pos - i1pos; //3
d2 = j3pos - i2pos; //3
d3 = j3pos - i3pos; //3
d4 = j3pos - i4pos; //3
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de ); //112 : 304
bornForce1.xyz += de.x*d1; //3*2=6
bornForce2.xyz += de.y*d2; //3*2=6
bornForce3.xyz += de.z*d3; //3*2=6
bornForce4.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// j = 4
d1 = j4pos - i1pos; //3
d2 = j4pos - i2pos; //3
d3 = j4pos - i3pos; //3
d4 = j4pos - i4pos; //3
loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de ); //112 : 304
bornForce1.xyz += de.x*d1; //3*2=6
bornForce2.xyz += de.y*d2; //3*2=6
bornForce3.xyz += de.z*d3; //3*2=6
bornForce4.xyz += de.w*d4; //3*2=6
// ---------------------------------------------------------------------------------------
// increment linear index by 4
jLinind += 4.0f;
}
jAtom.y += 1.0f;
}
}
kernel void kPostObcLoop2_nobranch(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float conversion,
float mergeNonObcForces,
float4 inObcForces<>,
float3 nonObcForces<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
float atomicRadii<>,
out float bornRadii<>,
out float obcChain<>,
out float3 outForces<> ){
// ---------------------------------------------------------------------------------------
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
float4 o1,o2,o3,o4;
float4 tmp;
float2 iAtom;
float4 forces;
float expPlus, expMinus;
// ---------------------------------------------------------------------------------------
// constants -- OBC Type II
const float alphaObc = 1.0f;
const float betaObc = 0.8f;
const float gammaObc = 4.85f;
const float dielectricOffset = 0.009f;
// ---------------------------------------------------------------------------------------
// given atom index find force indices and streams
pindex = indexof( outForces );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
// add current forces in inStream to forces stored in pstreams
// the .w entry is Born sum values; it will be used to calculate the
// Born radii and obcChain term
forces = inObcForces;
forces.w = 0.0f;
//forces = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// sum over j-loop 'duplications' by gathering from pstreams
for( i = 0.0f; i < repfac; i += 1.0f ){
// qIndex = floor( forceIndex/iUnroll );
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
// pindex.y = floor( qIndex/ pStreamWidth );
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
// pindex.x = qIndex - pindex.y*pStreamWidth + qOff;
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
o2 = pstream2[ pindex ];
o3 = pstream3[ pindex ];
o4 = pstream4[ pindex ];
tmp = qOff < 0.5f ? o1 : o2;
tmp = qOff < 1.5f ? tmp : o3;
tmp = qOff < 2.5f ? tmp : o4;
forces += tmp;
forceIndex += roundNatoms;
}
// compute Born radii and ObcChain
atomicRadiiOffset = atomicRadii - dielectricOffset;
bornSum = forces.w;
bornSum *= 0.5f*atomicRadiiOffset;
sum2 = bornSum*bornSum;
sum3 = bornSum*sum2;
// Tanh does not exist?
// calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
// tanhSum = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
tanhSum = bornSum - betaObc*sum2 + gammaObc*sum3;
expPlus = exp( tanhSum );
expMinus = 1.0f/expPlus;
tanhSum = ( expPlus - expMinus )/( expPlus + expMinus );
bornRadii = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );
obcIntermediate = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
obcChain = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
if( atomIndex >= natoms ){
bornRadii = 0.0f;
obcChain = 0.0f;
}
// add converted new forces to non-Obc forces
outForces = conversion*forces.xyz;
if( mergeNonObcForces > 0.1f ){
outForces += nonObcForces;
}
}
#ifndef __KGBSA_H__
#define __KGBSA_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
void kMergeFloat4(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float iUnroll,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream outputStream
);
void kMergeFloat4_4(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream outputStream
);
void kMergeFloat4_4X(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream outputStream
);
void kCheck(
const float natoms,
const float atomStrWidth,
const float pforceStrWidth,
const float unroll,
::brook::stream stream1
);
void kAddAndMergeFloat4(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float iUnroll,
::brook::stream inStream,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream outputStream
);
void kAddAndMergeFloat4_4(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
::brook::stream inStream,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream outputStream
);
void kPostObcLoop2(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
const float conversion,
const float mergeNonObcForces,
::brook::stream inObcForces,
::brook::stream nonObcForces,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream atomicRadii,
::brook::stream bornRadii,
::brook::stream obcChain,
::brook::stream outputStream
);
void kPostObcLoop2_nobranch(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
const float conversion,
const float mergeNonObcForces,
::brook::stream inObcForces,
::brook::stream nonObcForces,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream atomicRadii,
::brook::stream bornRadii,
::brook::stream obcChain,
::brook::stream outputStream
);
void kPostCalculateBornRadii_nobranch(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
const float conversion,
const float mergeNonObcForces,
::brook::stream stream1,
::brook::stream atomicRadii,
::brook::stream bornRadii,
::brook::stream obcChain
);
void kPreGbsaForce2(
::brook::stream intermediateForceIn,
::brook::stream bornRadii,
::brook::stream bornRadii2Force
);
void kPreObcForce2(
::brook::stream intermediateForceIn,
::brook::stream obcChainForceIn,
::brook::stream bornRadii,
::brook::stream bornRadii2Force
);
void kPostObcLoop1(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream obcChainForceIn,
::brook::stream bornRadii,
::brook::stream outputStream,
::brook::stream bornRadii2Force
);
void kPostObcLoop1_nobranch(
const float repfac,
const float atomStrWidth,
const float pforceStrWidth,
const float natoms,
const float roundNatoms,
const float iUnroll,
::brook::stream stream1,
::brook::stream stream2,
::brook::stream stream3,
::brook::stream stream4,
::brook::stream obcChainForceIn,
::brook::stream bornRadii,
::brook::stream outputStream,
::brook::stream bornRadii2Force
);
void kCopyFloat4(
::brook::stream stream1,
::brook::stream stream2
);
void kObcLoop1(
const float natoms,
const float roundedUpAtoms,
const float dupfac,
const float strwidth,
const float fstrwidth,
const float soluteDielectric,
const float solventDielectric,
const float includeAce,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream atomicRadii,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4
);
void kCalculateGbsaForce1_4 (
const float natoms,
const float roundedUpAtoms,
const float dupfac,
const float strwidth,
const float fstrwidth,
const float soluteDielectric,
const float solventDielectric,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream nonpolarForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4
);
void kCalculateGbsaForce1_4X (
const float natoms,
const float roundedUpAtoms,
const float dupfac,
const float strwidth,
const float fstrwidth,
const float soluteDielectric,
const float solventDielectric,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream nonpolarForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4
);
void kCalculateGbsaForce2_4 (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2
);
void kCalculateGbsaForce2_4Debug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kCalculateGbsaForce2_4Excl (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2
);
void kCalculateGbsaForce2_4ExclDebug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kCalculateGbsaForce2_2 (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2
);
void kCalculateGbsaForce2_2Debug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kCalculateGbsaForce2_2Excl (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2
);
void kCalculateGbsaForce2_2ExclDebug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kBornRadii(
::brook::stream gpolNonBonded,
::brook::stream gPolFixed,
::brook::stream bornRadii
);
void kPostGbsaForce2Debug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream inBornForce2,
::brook::stream gPolFixed,
::brook::stream bornRadii,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kCalculateGbsaForce2_1_4 (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1
);
void kCalculateGbsaForce2_1_4Debug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kCalculateGbsaForce2_1_4Excl (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1
);
void kCalculateGbsaForce2_1_4ExclDebug (
float natoms,
float dupfac,
float strwidth,
float fstrwidth,
const float debugIAtom,
const float debugJAtom,
const float debugReport,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream volume,
::brook::stream excl,
::brook::stream excludeBlockIndices,
::brook::stream inBornForce,
::brook::stream bornForce1,
::brook::stream debugStream1,
::brook::stream debugStream2,
::brook::stream debugStream3,
::brook::stream debugStream4
);
void kObcLoop2(
float numberOfAtoms,
float roundNatoms,
float duplicationFactor,
float streamWidth,
float fstreamWidth,
::brook::stream posq,
::brook::stream scaledAtomicRadii,
::brook::stream bornForceFactor,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4
);
void kCalculateBornRadii(
float numberOfAtoms,
float roundNatoms,
float duplicationFactor,
float streamWidth,
float fstreamWidth,
::brook::stream posq,
::brook::stream scaledAtomicRadii,
::brook::stream bornForce1
);
void kObcBornRadii(
float numberOfAtoms,
float streamWidth,
::brook::stream bornForce,
::brook::stream atomicRadii,
::brook::stream bornRadii,
::brook::stream obcChain
);
void kAceNonPolar(
float soluteDielectric,
float solventDielectric,
::brook::stream posq,
::brook::stream bornRadii,
::brook::stream vdwRadii,
::brook::stream bornForce
);
void kCalculateBornRadii(
float numberOfAtoms,
float roundNatoms,
float duplicationFactor,
float streamWidth,
float fstreamWidth,
::brook::stream posq,
::brook::stream atomicRadii,
::brook::stream scaledAtomicRadii,
::brook::stream bornForceFactor,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4
);
void kZeroFloat4( ::brook::stream bornForce );
void kSetValue4( float value, ::brook::stream bornForce );
void kSetValue3( float value, ::brook::stream bornForce );
void kSetValue2( float value, ::brook::stream bornForce );
void kSetValue1( float value, ::brook::stream bornForce );
void kCopyFloat3To4( ::brook::stream inForce, ::brook::stream outForce );
void kObcLoop2Cdlj4( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor,
float streamWidth, float fstreamWidth,
float jstreamWidth, float epsfac,
::brook::stream posq,
::brook::stream atomicRadii,
::brook::stream scaledAtomicRadii,
::brook::stream bornForceFactor,
::brook::stream isigeps,
::brook::stream sigma,
::brook::stream epsilon,
::brook::stream excl,
::brook::stream bornForce1,
::brook::stream bornForce2,
::brook::stream bornForce3,
::brook::stream bornForce4 );
#endif //__KGBSA_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
//Gather kernel for use with diferent angle, dihedral and improper functions
//
//For small systems, the overhead of calling the kernel is so high
//That we have to minimize the number of kernel calls. At the same
//time we don't want to waste reads, so I'm providing a bunch of kernels
//here that are unrolled to diferent extents. Use the appropriate one
//depending on how many inverse maps there are, rather than looping over
//the simple one.
//helper function to make the unrolling look better
kernel float3 do_gather( float strwidth, float4 invmap<>, float3 forces[][] ) {
float3 f;
float4 quotient, remainder;
float2 idx;
f = float3( 0.0f, 0.0f, 0.0f );
//Convert from linear to 2D index
// quotient = floor( invmap / strwidth );
quotient = round( ( invmap - fmod(invmap, strwidth))/strwidth );
remainder = invmap - quotient * strwidth;
//Add each force only if non-negative
if ( invmap.x >= 0.0f ) {
idx.y = quotient.x;
idx.x = remainder.x;
f = forces[ idx ];
}
if ( invmap.y >= 0.0f ) {
idx.y = quotient.y;
idx.x = remainder.y;
f += forces[ idx ];
}
if ( invmap.z >= 0.0f ) {
idx.y = quotient.z;
idx.x = remainder.z;
f += forces[ idx ];
}
if ( invmap.w >= 0.0f ) {
idx.y = quotient.w;
idx.x = remainder.w;
f += forces[ idx ];
}
return f;
}
//helper function to make the unrolling look better
kernel float3 do_gather_nobranch( float strwidth, float4 invmap<>, float3 forces[][] ) {
float3 f1, f2, f3, f4, f, z, t;
float4 quotient, remainder;
float2 idx;
float m1, m2, m3, m4;
z = float3( 0.0f, 0.0f, 0.0f );
f=z;
//Convert from linear to 2D index
// quotient = floor( invmap / strwidth );
quotient = round( ( invmap - fmod(invmap, strwidth))/strwidth );
remainder = invmap - quotient * strwidth;
m1 = invmap.x;
m2 = invmap.y;
m3 = invmap.z;
m4 = invmap.w;
//Add each force only if non-negative
idx.y = quotient.x;
idx.x = remainder.x;
f1 = forces[ idx ];
idx.y = quotient.y;
idx.x = remainder.y;
f2 = forces[ idx ];
idx.y = quotient.z;
idx.x = remainder.z;
f3 = forces[ idx ];
idx.y = quotient.w;
idx.x = remainder.w;
f4 = forces[ idx ];
f = (m1 >= 0.0f) ? f1 : z;
t = (m2 >= 0.0f) ? f2 : z;
f = f+t;
t = (m3 >= 0.0f) ? f3 : z;
f = f+t;
t = (m4 >= 0.0f) ? f4 : z;
f = f+t;
return f;
}
//Simple version, takes only one index stream
kernel void kinvmap_gather(
float strwidth, //stream width of the dihedral forces
float4 invmap<>, //indices into the dihedral forces
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap, forces );
}
//Takes two inverse maps
kernel void kinvmap_gather2(
float strwidth, //stream width of the dihedral forces
float4 invmap1<>, //indices into the dihedral forces
float4 invmap2<>, //indices into the dihedral forces
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap1, forces );
outforce += do_gather_nobranch( strwidth, invmap2, forces );
}
//Takes three inverse maps
kernel void kinvmap_gather3(
float strwidth, //stream width of the dihedral forces
float4 invmap1<>, //indices into the dihedral forces
float4 invmap2<>, //indices into the dihedral forces
float4 invmap3<>,
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap1, forces );
outforce += do_gather_nobranch( strwidth, invmap2, forces );
outforce += do_gather_nobranch( strwidth, invmap3, forces );
}
//Takes four inverse maps
kernel void kinvmap_gather4(
float strwidth, //stream width of the dihedral forces
float4 invmap1<>, //indices into the dihedral forces
float4 invmap2<>, //indices into the dihedral forces
float4 invmap3<>,
float4 invmap4<>,
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap1, forces );
outforce += do_gather_nobranch( strwidth, invmap2, forces );
outforce += do_gather_nobranch( strwidth, invmap3, forces );
outforce += do_gather_nobranch( strwidth, invmap4, forces );
}
//Takes five inverse maps
kernel void kinvmap_gather5(
float strwidth, //stream width of the dihedral forces
float4 invmap1<>, //indices into the dihedral forces
float4 invmap2<>, //indices into the dihedral forces
float4 invmap3<>,
float4 invmap4<>,
float4 invmap5<>,
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap1, forces );
outforce += do_gather_nobranch( strwidth, invmap2, forces );
outforce += do_gather_nobranch( strwidth, invmap3, forces );
outforce += do_gather_nobranch( strwidth, invmap4, forces );
outforce += do_gather_nobranch( strwidth, invmap5, forces );
}
//Takes six inverse maps - this is the last one!
kernel void kinvmap_gather6(
float strwidth, //stream width of the dihedral forces
float4 invmap1<>, //indices into the dihedral forces
float4 invmap2<>, //indices into the dihedral forces
float4 invmap3<>,
float4 invmap4<>,
float4 invmap5<>,
float4 invmap6<>,
float3 forces[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap1, forces );
outforce += do_gather_nobranch( strwidth, invmap2, forces );
outforce += do_gather_nobranch( strwidth, invmap3, forces );
outforce += do_gather_nobranch( strwidth, invmap4, forces );
outforce += do_gather_nobranch( strwidth, invmap5, forces );
outforce += do_gather_nobranch( strwidth, invmap6, forces );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_2(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_3(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_4(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_5(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float4 invmap4_5<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_5, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_2(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_3(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_4(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
}
//Takes three + five inverse maps
kernel void kinvmap_gather3_5(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1[][], //indices into the dihedral forces
float4 invmap3_2[][], //indices into the dihedral forces
float4 invmap3_3[][],
float3 forces3[][], //dihedral forces
float4 invmap5_1[][], //indices into the dihedral forces
float4 invmap5_2[][], //indices into the dihedral forces
float4 invmap5_3[][],
float4 invmap5_4[][],
float4 invmap5_5[][],
float3 forces5[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
float2 idx = indexof(outforce);
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2[idx], forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3[idx], forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4[idx], forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_5[idx], forces5 );
}
//Takes one + one inverse maps
kernel void kinvmap_gather1_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1[][], //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap5_1[][], //indices into the dihedral forces
float3 forces5[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
float2 idx = indexof(outforce);
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather5_2(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float4 invmap5_2<>, //indices into the dihedral forces
float4 invmap5_3<>,
float4 invmap5_4<>,
float4 invmap5_5<>,
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather5_3(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float4 invmap5_2<>, //indices into the dihedral forces
float4 invmap5_3<>,
float4 invmap5_4<>,
float4 invmap5_5<>,
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float4 invmap2_3<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather4_3(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float4 invmap5_2<>, //indices into the dihedral forces
float4 invmap5_3<>,
float4 invmap5_4<>,
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float4 invmap2_3<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather1_2(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather4_2(
float strwidth, //stream width of the dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float3 forces4[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
}
kernel float3 etch_force(
float fpos,
float strwidth,
float3 fi[][],
float3 fj[][],
float3 fk[][],
float3 fl[][]
)
{
float2 ind;
float _fpos;
_fpos = fpos;
if ( _fpos > 300000.0f ) {
_fpos = _fpos - 300000.0f;
//ind.y = floor( _fpos / strwidth );
ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
ind.x = _fpos - ind.y * strwidth;
return fl[ ind ];
}
else if ( _fpos > 200000.0f ) {
_fpos = _fpos - 200000.0f;
//ind.y = floor( _fpos / strwidth );
ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
ind.x = _fpos - ind.y * strwidth;
return fk[ ind ];
}
else if ( _fpos > 100000.0f ) {
_fpos = _fpos - 100000.0f;
//ind.y = floor( _fpos / strwidth );
ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
ind.x = _fpos - ind.y * strwidth;
return fj[ ind ];
}
else if ( _fpos >= -0.5f ) {
//ind.y = floor( _fpos / strwidth );
ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
ind.x = _fpos - ind.y * strwidth;
return fi[ ind ];
}
else
return 0.0f;
}
//For-loop version doesn't work
//Using a merged version of the above
kernel float2 linear_to_2D( float linind, float width )
{
float2 ind;
//ind.y = floor( linind / width );
ind.y = round( ( linind - fmod( linind, width))/width );
ind.x = linind - ind.y * width;
return ind;
}
//helper function to make the unrolling look better
kernel float3 do_gather_merged_single( float strwidth, float invmap,
float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][] ) {
float3 f;
float2 idx;
float _invmap;
float n;
_invmap = invmap;
n = floor( _invmap / 100000.0f );
_invmap -= n * 100000.0f;
idx = linear_to_2D( _invmap, strwidth );
if ( n > 2.5f ) {
f = fl[ idx ];
}
else if ( n > 1.5f ) {
f = fk[ idx ];
}
else if ( n > 0.5f ) {
f = fj[ idx ];
}
else if( n > -0.5f ) {
f = fi[ idx ];
}
return f;
}
kernel float3 do_gather_merged( float strwidth, float4 invmap,
float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][])
{
float3 f;
f = do_gather_merged_single( strwidth, invmap.x, fi, fj, fk, fl )
+ do_gather_merged_single( strwidth, invmap.y, fi, fj, fk, fl )
+ do_gather_merged_single( strwidth, invmap.z, fi, fj, fk, fl )
+ do_gather_merged_single( strwidth, invmap.w, fi, fj, fk, fl );
return f;
}
kernel void kinvmap_gather_merged5(
float natoms, //number of atoms
float strwidth, //stream width of out-of-order forces
float4 invmap0<>,
float4 invmap1<>,
float4 invmap2<>,
float4 invmap3<>,
float4 invmap4<>,
float3 fi[][], //i-forces
float3 fj[][], //j-forces
float3 fk[][], //k-forces
float3 fl[][], //l-forces
float3 inforce<>,
out float3 outforce<>
)
{
outforce = inforce;
outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap1, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap2, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap3, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap4, fi, fj, fk, fl );
}
kernel void kinvmap_gather_merged9(
float natoms, //number of atoms
float strwidth, //stream width of out-of-order forces
float4 invmap0<>,
float4 invmap1<>,
float4 invmap2<>,
float4 invmap3<>,
float4 invmap4<>,
float4 invmap5<>,
float4 invmap6<>,
float4 invmap7<>,
float4 invmap8<>,
float3 fi[][], //i-forces
float3 fj[][], //j-forces
float3 fk[][], //k-forces
float3 fl[][], //l-forces
float3 inforce<>,
out float3 outforce<>
)
{
outforce = inforce;
outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap1, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap2, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap3, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap4, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap5, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap6, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap7, fi, fj, fk, fl )
+ do_gather_merged( strwidth, invmap8, fi, fj, fk, fl );
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
void kinvmap_gather (const float strwidth,
::brook::stream invmap,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2 (const float strwidth,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3 (const float strwidth,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather4 (const float strwidth,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream invmap4,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather5 (const float strwidth,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream invmap4,
::brook::stream invmap5,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather6 (const float strwidth,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream invmap4,
::brook::stream invmap5,
::brook::stream invmap6,
::brook::stream forces,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_2 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_3 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather1_2 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_4 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_5 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream invmap4_5,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_2 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_3 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_4 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_5 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream invmap5_5,
::brook::stream forces5,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather1_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream forces3,
::brook::stream invmap5_1,
::brook::stream forces5,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather5_2 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream invmap5_5,
::brook::stream forces5,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather5_3 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream invmap5_5,
::brook::stream forces5,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream invmap2_3,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather4_3 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream forces5,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream invmap2_3,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather4_2 (const float strwidth,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream forces4,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather_merged (const float natoms,
const float strwidth,
const float istrwidth,
const float fstrwidth,
::brook::stream nimap,
::brook::stream invmap,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather_merged9 (const float natoms,
const float strwidth,
::brook::stream invmap0,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream invmap4,
::brook::stream invmap5,
::brook::stream invmap6,
::brook::stream invmap7,
::brook::stream invmap8,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather_merged5 (const float natoms,
const float strwidth,
::brook::stream invmap0,
::brook::stream invmap1,
::brook::stream invmap2,
::brook::stream invmap3,
::brook::stream invmap4,
::brook::stream fi,
::brook::stream fj,
::brook::stream fk,
::brook::stream fl,
::brook::stream inforce,
::brook::stream outforce);
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kMergeFloat(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
iter float2 count<>,
float pstream1[][],
float pstream2[][],
out float outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
//convert to linear atom index
linind = count.x + count.y * atomStrWidth;
//If odd or even, we pick from diferent streams.
odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
linind = floor( linind / iUnroll );
outstream = 0.0f;
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0; i < repfac; i+=1.0f ) {
pindex.y = floor( linind / pstreamStrWidth );
pindex.x = linind - pindex.y * pstreamStrWidth;
if ( odd > 0.5f ) { //is odd
outstream += pstream2[ pindex ];
} else {
outstream += pstream1[ pindex ];
}
linind += natoms/iUnroll;
}
}
kernel void kMergeFloat4(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
float4 pstream1[][],
float4 pstream2[][],
out float4 outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
//convert to linear atom index
linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth );
//If odd or even, we pick from diferent streams.
odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
linind = floor( linind / iUnroll );
outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0.0f; i < repfac; i+= 1.0f ) {
pindex.y = floor( linind / pstreamStrWidth );
pindex.x = linind - pindex.y * pstreamStrWidth;
if ( odd > 0.5f ) { //is odd
outstream += pstream2[ pindex ];
} else {
outstream += pstream1[ pindex ];
}
linind += natoms/iUnroll;
}
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kMergeFloat4_4X(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
out float4 outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
//convert to linear atom index
linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth );
//If odd or even, we pick from diferent streams.
odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
linind = floor( linind / iUnroll );
outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0.0f; i < repfac; i+= 1.0f ) {
//pindex.y = floor( linind / pstreamStrWidth );
//pindex.x = linind - pindex.y * pstreamStrWidth;
pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
pindex.x = linind - pindex.y * pstreamStrWidth;
outstream += float4( linind, odd, pindex.x, pindex.y );
/*
if ( odd < 0.5f ) { //is odd
outstream += pstream1[ pindex ];
} else if( odd < 1.5f ){
outstream += pstream2[ pindex ];
} else if( odd < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
*/
linind += natoms/iUnroll;
}
}
kernel void kMergeFloat4_4(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
out float4 outstream<> )
{
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
// given atom index find force indices and streams
pindex = indexof( outstream );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
for( i = 0.0f; i < repfac; i += 1.0f ){
// qIndex = floor( forceIndex/iUnroll );
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
// pindex.y = floor( qIndex/ pStreamWidth );
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
// pindex.x = qIndex - pindex.y*pStreamWidth + qOff;
pindex.x = qIndex - pindex.y*pStreamWidth;
// outstream += float4( forceIndex, qIndex, pindex.x, pindex.y );
if ( qOff < 0.5f ){
outstream += pstream1[ pindex ];
} else if( qOff < 1.5f ){
outstream += pstream2[ pindex ];
} else if( qOff < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
forceIndex += roundNatoms;
}
}
kernel void kSetValue4( float value, out float4 outstream<> ){
outstream = float4( value, value, value, value );
}
kernel void kSetValue3( float value, out float3 outstream<> ){
outstream = float3( value, value, value );
}
kernel void kSetValue2( float value, out float2 outstream<> ){
outstream = float2( value, value );
}
kernel void kSetValue1( float value, out float outstream<> ){
outstream = value;
}
kernel void kCheck( float natoms, float atomStrWidth, float pstreamStrWidth, float unroll, out float4 outstream<> )
{
float linind, forceIndex, atomIndex;
float2 pindex;
pindex = indexof( outstream );
forceIndex = unroll*(pindex.x + pindex.y*pstreamStrWidth);
atomIndex = fmod( forceIndex, natoms );
outstream = float4( pindex.x, pindex.y, forceIndex, atomIndex );
}
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel void kAddAndMergeFloat4(
float repfac,
float atomStrWidth,
float pstreamStrWidth,
float natoms,
float iUnroll,
float4 inStream<>,
float4 pstream1[][],
float4 pstream2[][],
out float4 outstream<> )
{
float linind;
float2 pindex;
float odd;
float i;
float floor_linind_iUnroll;
linind = (indexof outstream).x + (indexof outstream).y * atomStrWidth;
//If odd or even, we pick from diferent streams.
//odd = linind - floor( linind / iUnroll ) * iUnroll;
//Now linear index is the index into partial_streams
//linind = floor( linind / iUnroll );
floor_linind_iUnroll = round( (linind - fmod(linind, iUnroll))/iUnroll );
odd = linind - floor_linind_iUnroll * iUnroll;//bixia modify
linind = floor_linind_iUnroll; //bixia modify
outstream = inStream;
outstream.w = 0.0f;
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0.0f; i < repfac; i+= 1.0f ) {
//pindex.y = floor( linind / pstreamStrWidth );
pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
pindex.x = linind - pindex.y * pstreamStrWidth;
if ( odd > 0.5f ) { //is odd
outstream += pstream2[ pindex ];
} else {
outstream += pstream1[ pindex ];
}
linind += natoms/iUnroll;
}
}
kernel void kAddAndMergeFloat4_4(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float4 inStream<>,
float4 pstream1[][],
float4 pstream2[][],
float4 pstream3[][],
float4 pstream4[][],
out float4 outstream<> ){
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
// given atom index find force indices and streams
pindex = indexof( outstream );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
// add current forces in inStream to forces stored in pstreams
// the .w entry is Born sum values; it will be used to calculate the
// Born radii and obcChain term
outstream = inStream;
outstream.w = 0.0f;
//outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
// sum over j-loop 'duplications' by gathering from pstreams
for( i = 0.0f; i < repfac; i += 1.0f ){
// qIndex = floor( forceIndex/iUnroll );
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
// pindex.y = floor( qIndex/ pStreamWidth );
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
// pindex.x = qIndex - pindex.y*pStreamWidth + qOff;
pindex.x = qIndex - pindex.y*pStreamWidth;
if( qOff < 0.5f ){
outstream += pstream1[ pindex ];
} else if( qOff < 1.5f ){
outstream += pstream2[ pindex ];
} else if( qOff < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
forceIndex += roundNatoms;
}
}
/* Add forces from two streams */
kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){
outForce.xyz = force1 + conversion*force2.xyz;
}
/* Copy one stream to another */
kernel void kCopyFloat4( float4 inForce<>, out float4 outForce<> ){
outForce = inForce;
}
/* Copy one stream to another
* */
kernel void kCopyFloat3To4(
float3 inForce<>,
out float4 outForce<> ){
// ---------------------------------------------------------------------------------------
outForce.xyz = inForce;
outForce.w = 0.0f;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/* After forces above, we have the forces for even numbered particles
* in one stream, odd numbered particles in another.
* In each stream, the forces are in several parts depending on how
* many times we replicated the input stream.
*
* This kernel should not be a bottle neck, but if it turns out to
* be so, we can try some work arounds.
*
* To avoid an extra kernel to zero forces, this sets the forces
* rather than adding to it.
* */
kernel static void kmerge_partial_forces(
float repfac,
float atomStrWidth,
float pforceStrWidth,
float natoms,
float3 pforce1[][],
float3 pforce2[][],
out float3 force<> )
{
float linind;
float2 pindex;
float odd;
float i;
float2 adjustcount = (indexof force);
//convert to linear atom index
linind = adjustcount.x + adjustcount.y * atomStrWidth;
//If odd or even, we pick from diferent streams.
odd = linind - floor( linind / 2.0f ) * 2.0f;
//Now linear index is the index into partial_forces
linind = floor( linind / 2.0f );
force = float3( 0.0f, 0.0f, 0.0f );
//If we have predicated conditionals, we should
//keep the conditional inside the loop
for ( i = 0; i < repfac; i+=1.0f ) {
pindex.y = floor( linind / pforceStrWidth );
pindex.x = linind - pindex.y * pforceStrWidth;
//pindex.x = fmod( linind, pforceStrWidth );
if ( odd > 0.5f ) { //is odd
force += pforce2[ pindex ];
}
else {
force += pforce1[ pindex ];
}
linind += natoms/2.0f;
}
}
kernel void kMergeFloat3_4(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float3 pstream1[][],
float3 pstream2[][],
float3 pstream3[][],
float3 pstream4[][],
out float3 outstream<> )
{
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
// given atom index find force indices and streams
pindex = indexof( outstream );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
outstream = float3( 0.0f, 0.0f, 0.0f );
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
if ( qOff < 0.5f ){
outstream += pstream1[ pindex ];
} else if( qOff < 1.5f ){
outstream += pstream2[ pindex ];
} else if( qOff < 2.5f ){
outstream += pstream3[ pindex ];
} else {
outstream += pstream4[ pindex ];
}
forceIndex += roundNatoms;
}
}
kernel void kMergeFloat3_4_nobranch(
float repfac,
float atomStreamWidth,
float pStreamWidth,
float natoms,
float roundNatoms,
float iUnroll,
float3 pstream1[][],
float3 pstream2[][],
float3 pstream3[][],
float3 pstream4[][],
out float3 outstream<> )
{
float atomIndex, forceIndex, qIndex, qOff;
float2 pindex;
float i;
float3 o1,o2,o3,o4;
float3 tmp;
// given atom index find force indices and streams
pindex = indexof( outstream );
atomIndex = pindex.x + pindex.y*atomStreamWidth;
forceIndex = atomIndex;
outstream = float3( 0.0f, 0.0f, 0.0f );
for( i = 0.0f; i < repfac; i += 1.0f ){
qIndex = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
qOff = forceIndex - iUnroll*qIndex;
pindex.y = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
pindex.x = qIndex - pindex.y*pStreamWidth;
o1 = pstream1[ pindex ];
o2 = pstream2[ pindex ];
o3 = pstream3[ pindex ];
o4 = pstream4[ pindex ];
tmp = qOff < 0.5f ? o1 : o2;
tmp = qOff < 1.5f ? tmp : o3;
tmp = qOff < 2.5f ? tmp : o4;
outstream += tmp;
forceIndex += roundNatoms;
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/*
* This kernel constrains bonds involving hydrogen using an unrolled
* shake implementation. We assume that a heavy atom can have at most
* 3 hydrogens attached. The hydrogens are expected to be identical
*
* For atoms that have ewer than 3 hydrogens
*
* Also we don't check the constraints!
*
* We do a fixed number of iterations without checking for convergence
* to avoid stalling
* *************************************************************/
/* Fix for precision of terms first order in dt
* To be used with corresponding update kernels
* The input posqp is now changes to posq rather than
* posq+changes
* */
kernel void
kshakeh_fix1(
float maxIterations, //number of iterations
float strwidth, //stream width of posq
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 1/m1 )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
out float3 cposq3<> //ditto for h3
) {
float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
float i; //iteration count
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor, tolerance, converged;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
float ld1, ld2, ld3;
// ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
// aj1.y = floor( atoms.y / strwidth );
aj1.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj1.x = atoms.y - aj1.y * strwidth;
//If further hydrogens are absent,
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if( atoms.z > -0.5f ){
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
} else {
aj2 = aj1;
mask2 = 0.0f;
}
if( atoms.w > -0.5f ){
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
} else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
i = 0.0f;
converged = 1.0f;
tolerance = 2.0f*inputTolerance;
while( i < maxIterations && converged > 0.0f ){
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
acor = ( ld1 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
diff = abs( ld1 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = (diff < 1.0f) ? 0.0f : acor;
converged = abs( acor );
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * params.w;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
diff = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += abs( acor );
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * params.w;
//Third hydrogen
rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += abs( acor );
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * params.w;
i += 1.0f;
}
//Output modified delta's
cposq0 = xpi;
cposq1 = xpj1;
cposq2 = xpj2;
cposq3 = xpj3;
}
kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float3 posqp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2
float3 cposq3[][], //ditto for h3
out float3 oposq<> //updated deltas
){
float2 atom;
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth;
if( invmap.y < 0 ){
oposq = posqp;
} else if( invmap.y < 0.5f ){
oposq = cposq0[ atom ];
} else if( invmap.y < 1.5f ){
oposq = cposq1[ atom ];
} else if( invmap.y < 2.5f ){
oposq = cposq2[ atom ];
} else if( invmap.y < 3.5f ){
oposq = cposq3[ atom ];
}
}
kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
float3 posqp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2
float3 cposq3[][], //ditto for h3
out float3 oposq<> //updated deltas
){
float2 atom;
atom.y = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
atom.x = invmap.x - atom.y * strwidth;
if ( invmap.y < 0 ){
oposq = posqp;
} else if ( invmap.y < 0.5f ){
oposq = cposq0[ atom ];
} else if ( invmap.y < 1.5f){
oposq = cposq1[ atom ];
} else if ( invmap.y < 2.5f ){
oposq = cposq2[ atom ];
} else if ( invmap.y < 3.5f ){
oposq = cposq3[ atom ];
}
oposq += posq;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
void kshakeh_fix1 (
const float maxIterations,
const float strwidth,
const float tolerance,
::brook::stream atoms,
::brook::stream posq,
::brook::stream posqp,
::brook::stream params,
::brook::stream cposq0,
::brook::stream cposq1,
::brook::stream cposq2,
::brook::stream cposq3);
void kshakeh_update1_fix1 (
const float strwidth,
::brook::stream invmap,
::brook::stream posqp,
::brook::stream cposq0,
::brook::stream cposq1,
::brook::stream cposq2,
::brook::stream cposq3,
::brook::stream oposq);
void kshakeh_update2_fix1 (
const float strwidth,
::brook::stream invmap,
::brook::stream posq,
::brook::stream posqp,
::brook::stream cposq0,
::brook::stream cposq1,
::brook::stream cposq2,
::brook::stream cposq3,
::brook::stream oposq);
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/*
* Brownian dynamics integration
*
* @param xstrwidth atom stream width
* @param gstrwidth Gaussian stream width
* @param goffset Gaussian offset into stream
* @param forceScale force scale factor
* @param noiseAmplitude noise amplitude
* @param fgauss random numbers
* @param pos atom positions
* @param force force
* @param posp delta positions
*
**/
kernel void kintegrate_bd( float xstrwidth, float gstrwidth, float goffset,
float forceScale, float noiseAmplitude, float3 fgauss[][],
float3 pos<>, float3 force<>, out float3 posp<> ){
// ---------------------------------------------------------------------------------------
float3 fg1;
float linind_gauss; //linear index into the random numbers
float2 igauss; //2d index
// ---------------------------------------------------------------------------------------
igauss = indexof( pos );
linind_gauss = igauss.y * xstrwidth + igauss.x;
linind_gauss = linind_gauss + goffset;
igauss.y = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
igauss.x = linind_gauss - igauss.y * gstrwidth;
fg1 = fgauss[ igauss ];
posp = forceScale*force + noiseAmplitude*fg1;
}
/*
* Brownian dynamics update
*
* @param velocityScale velocity scale
* @param posp atom positions
* @param posIn atom positions
* @param velocity velocity
* @param posp delta positions
*
rfac = sqrt(2.0*BOLTZ*temp/(fr*dt));
invfr = 1.0/fr;
vn = invfr*f[n][d] + rfac*fgauss(&jran);
v[n][d] = vn;
xprime[n][d] = x[n][d]+vn*dt;
**/
kernel void kupdate_bd( float velocityScale,
float3 posp<>, float3 posIn<>, out float3 velocity<>, out float3 posOut<> ){
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
velocity = velocityScale*( posp - posIn );
posOut = posp;
}
/*
* Brownian dynamics update (no Shake)
*
* @param velocityScale velocity scale
* @param posp atom positions
* @param posIn atom positions
* @param velocity velocity
* @param posp delta positions
*
**/
kernel void kupdate_bd2( float velocityScale,
float3 posp<>, float3 posIn<>, out float3 velocity<>, out float3 posOut<> ){
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
velocity = velocityScale*posp;
posOut = posIn + posp;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs, Mike Houston *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/*
* Brownian dynamics integration
*
* @param xstrwidth atom stream width
* @param gstrwidth Gaussian stream width
* @param goffset Gaussian offset into stream
* @param forceScale force scale factor
* @param noiseAmplitude noise amplitude
* @param fgauss random numbers
* @param pos atom positions
* @param force force
* @param posp delta positions
*
**/
void kintegrate_bd( const float xstrwidth, const float gstrwidth, const float goffset,
const float forceScale, const float noiseAmplitude,
::brook::stream fgauss, ::brook::stream posq,
::brook::stream force, ::brook::stream posp );
/*
* Brownian dynamics update
*
* @param velocityScale velocity scale
* @param posp atom positions
* @param posIn atom positions
* @param velocity velocity
* @param posp delta positions
*
**/
void kupdate_bd( const float velocityScale, ::brook::stream posp, ::brook::stream posIn,
::brook::stream velocity, ::brook::stream posOut );
/*
* Brownian dynamics update
*
* @param velocityScale velocity scale
* @param posp atom positions
* @param velocity velocity
* @param posp delta positions
*
**/
void kupdate_bd2( const float velocityScale, ::brook::stream posp,
::brook::stream posIn, ::brook::stream velocity, ::brook::stream posOut );
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment