Commit b05c6aaa authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added code to set duplication factor based on min suggested threads

Added more ik,jl gathers for bonded forces
Redid shared #define setting in CMakeList.txt
parent 5bd5dcbb
......@@ -161,13 +161,14 @@ IF(LOG)
ENDIF(LOG)
# ----------------------------------------------------------------------------
# BROOK_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
ADD_LIBRARY(${SHARED_BROOK_TARGET} SHARED ${BROOK_CPP_FILES} ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} )
SET_TARGET_PROPERTIES(${SHARED_BROOK_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
IF(INCLUDE_BROOK_STATIC)
ADD_LIBRARY(${STATIC_BROOK_TARGET} STATIC ${BROOK_CPP_FILES} ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} )
SET_TARGET_PROPERTIES(${STATIC_BROOK_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_USE_STATIC_LIBRARIES")
ENDIF(INCLUDE_BROOK_STATIC)
# ----------------------------------------------------------------------------
......
......@@ -206,7 +206,9 @@ class OPENMM_EXPORT BrookPlatform : public Platform {
* @return true if BrookPlatform supports double precison
*/
bool supportsDoublePrecision( void ) const;
// w/ FAH bool is redefined as int; causes problem w/ Platform::bool supportsDoublePrecision() const;
#define bool bool
bool supportsDoublePrecision( void ) const;
/**
* Return default Brook stream factory
......@@ -288,6 +290,18 @@ class OPENMM_EXPORT BrookPlatform : public Platform {
void contextDestroyed( OpenMMContextImpl& context ) const;
/**
* Get minSuggestedThreads
*/
int getMinSuggestedThreads( void ) const;
/**
* Get duplicationFactor
*/
int getDuplicationFactor( int numberOfParticles ) const;
private:
// log file reference
......@@ -310,6 +324,10 @@ class OPENMM_EXPORT BrookPlatform : public Platform {
std::string _runtime;
// min suggested threads
int _minSuggestedThreads;
/**
* Initialize kernel factory
*
......
......@@ -1955,6 +1955,21 @@ void BrookBonded::computeForces( BrookStreamImpl& positionStream, BrookStreamImp
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 2 && getInverseMapStreamCount( K_Stream ) == 3 ){
kinvmap_gather2_3( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
inverseStreamMaps[K_Stream][1]->getBrookStream(),
inverseStreamMaps[K_Stream][2]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 3 && getInverseMapStreamCount( K_Stream ) == 4 ){
kinvmap_gather3_4( width,
......@@ -1987,6 +2002,66 @@ void BrookBonded::computeForces( BrookStreamImpl& positionStream, BrookStreamImp
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 3 && getInverseMapStreamCount( K_Stream ) == 2 ){
kinvmap_gather3_2( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
inverseStreamMaps[I_Stream][2]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
inverseStreamMaps[K_Stream][1]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 3 && getInverseMapStreamCount( K_Stream ) == 1 ){
kinvmap_gather3_1( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
inverseStreamMaps[I_Stream][2]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 2 && getInverseMapStreamCount( K_Stream ) == 4 ){
kinvmap_gather2_4( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
inverseStreamMaps[K_Stream][1]->getBrookStream(),
inverseStreamMaps[K_Stream][2]->getBrookStream(),
inverseStreamMaps[K_Stream][3]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 2 && getInverseMapStreamCount( K_Stream ) == 5 ){
kinvmap_gather2_5( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
inverseStreamMaps[K_Stream][1]->getBrookStream(),
inverseStreamMaps[K_Stream][2]->getBrookStream(),
inverseStreamMaps[K_Stream][3]->getBrookStream(),
inverseStreamMaps[K_Stream][4]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 2 && getInverseMapStreamCount( K_Stream ) == 1 ){
kinvmap_gather2_1( width,
inverseStreamMaps[I_Stream][0]->getBrookStream(),
inverseStreamMaps[I_Stream][1]->getBrookStream(),
bondedForceStreams[I_Stream]->getBrookStream(),
inverseStreamMaps[K_Stream][0]->getBrookStream(),
bondedForceStreams[K_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( I_Stream ) == 1 && getInverseMapStreamCount( K_Stream ) == 1 ){
kinvmap_gather1_1( width,
......@@ -2061,13 +2136,88 @@ void BrookBonded::computeForces( BrookStreamImpl& positionStream, BrookStreamImp
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 4 && getInverseMapStreamCount( L_Stream ) == 2 ){
kinvmap_gather4_2( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
inverseStreamMaps[J_Stream][1]->getBrookStream(),
inverseStreamMaps[J_Stream][2]->getBrookStream(),
inverseStreamMaps[J_Stream][3]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 3 && getInverseMapStreamCount( L_Stream ) == 2 ){
kinvmap_gather3_2( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
inverseStreamMaps[J_Stream][1]->getBrookStream(),
inverseStreamMaps[J_Stream][2]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 2 && getInverseMapStreamCount( L_Stream ) == 2 ){
kinvmap_gather2_2( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
inverseStreamMaps[J_Stream][1]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 1 && getInverseMapStreamCount( L_Stream ) == 2 ){
kinvmap_gather1_2( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 5 && getInverseMapStreamCount( L_Stream ) == 3 ){
kinvmap_gather5_3( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
inverseStreamMaps[J_Stream][1]->getBrookStream(),
inverseStreamMaps[J_Stream][2]->getBrookStream(),
inverseStreamMaps[J_Stream][3]->getBrookStream(),
inverseStreamMaps[J_Stream][4]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
inverseStreamMaps[L_Stream][2]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else if( getInverseMapStreamCount( J_Stream ) == 4 && getInverseMapStreamCount( L_Stream ) == 3 ){
kinvmap_gather4_3( width,
inverseStreamMaps[J_Stream][0]->getBrookStream(),
inverseStreamMaps[J_Stream][1]->getBrookStream(),
inverseStreamMaps[J_Stream][2]->getBrookStream(),
inverseStreamMaps[J_Stream][3]->getBrookStream(),
bondedForceStreams[J_Stream]->getBrookStream(),
inverseStreamMaps[L_Stream][0]->getBrookStream(),
inverseStreamMaps[L_Stream][1]->getBrookStream(),
inverseStreamMaps[L_Stream][2]->getBrookStream(),
bondedForceStreams[L_Stream]->getBrookStream(),
forceStream.getBrookStream(), forceStream.getBrookStream() );
} else {
// case not handled -- throw an exception
FILE* log = getLog() ? getLog() : stderr;
if( ErrorMessages++ < MaxErrorMessages && getInverseMapStreamCount( J_Stream ) > 0 && getInverseMapStreamCount( L_Stream ) > 0 ){
(void) fprintf( log, "%s case: J-map=%d L-map=%d -- not handled.\n",
(void) fprintf( log, "%s case: J-map=%d L-map=%d -- not handled -- contact OpenMM developers.\n",
methodName.c_str(), getInverseMapStreamCount( J_Stream ),
getInverseMapStreamCount( L_Stream ) );
(void) fflush( log );
......
......@@ -188,6 +188,20 @@ float BrookGbsa::getDielectricOffset( void ) const {
return (float) _dielectricOffset;
}
/**
* Set duplication factor
*
* @param duplication factor
*
* @return DefaultReturnValue
*
*/
int BrookGbsa::setDuplicationFactor( int duplicationFactor ){
_duplicationFactor = duplicationFactor;
return DefaultReturnValue;
}
/**
* Set outer loop unroll
*
......
......@@ -83,6 +83,17 @@ class BrookGbsa : public BrookCommon {
int getDuplicationFactor( void ) const;
/**
* Set duplication factor
*
* @param duplication factor
*
* @return DefaultReturnValue
*
*/
int setDuplicationFactor( int duplicationFactor );
/**
* Get particle ceiling parameter
*
......
......@@ -193,6 +193,20 @@ int BrookNonBonded::getDuplicationFactor( void ) const {
return _duplicationFactor;
}
/**
* Set duplication factor
*
* @param duplication factor
*
* @return DefaultReturnValue
*
*/
int BrookNonBonded::setDuplicationFactor( int duplicationFactor ){
_duplicationFactor = duplicationFactor;
return DefaultReturnValue;
}
/**
* Get j-stream width
*
......
......@@ -323,6 +323,17 @@ class BrookNonBonded : public BrookCommon {
int setup( int numberOfParticles, const std::vector<std::vector<double> >& nonbondedParameters,
const std::vector<std::set<int> >& exclusions, const Platform& platform );
/**
* Set duplication factor
*
* @param duplication factor
*
* @return DefaultReturnValue
*
*/
int setDuplicationFactor( int duplicationFactor );
/*
* Get contents of object
*
......
......@@ -206,6 +206,7 @@ BrookPlatform::BrookPlatform( ){
// ---------------------------------------------------------------------------------------
_particleStreamWidth = DefaultParticleStreamWidth;
_minSuggestedThreads = -1;
_log = NULL;
//_log = stderr;
......@@ -330,15 +331,36 @@ void BrookPlatform::_setBrookRuntime( const std::string& runtime ){
throw OpenMMException( message.str() );
}
// let user know runtime setting
if( 1 ){
//When compiling with cygwin/cl combo, doesn't
//always work from the environment, so I'm
//hardcoding it here. An alternative might be to getenv() in
//the gromacs code and pass it here. The cygwin getenv() hopefully
//will work more deterministically.
char* info_string = NULL;
int minSuggestedThreads;
brook::initialize( _runtime.c_str(), NULL, &info_string, &minSuggestedThreads );
FILE* log = getLog() ? getLog() : stderr;
(void) fprintf( log, "Using runtime %s; initializing Brook\n", _runtime.c_str() );
fprintf( log, "############\n\nBrook info_string:\n%s\n############\n", info_string );
(void) fflush( log );
if( minSuggestedThreads > 0 ){
_minSuggestedThreads = minSuggestedThreads;
}
} else {
FILE* log = getLog() ? getLog() : stderr;
(void) fprintf( log, "%s Brook initializing to runtime=<%s>\n", methodName.c_str(), _runtime.c_str() );
(void) fflush( log );
}
brook::initialize( _runtime.c_str(), NULL );
brook::initialize( _runtime.c_str(), NULL );
}
}
......@@ -352,6 +374,46 @@ std::string BrookPlatform::getName() const {
return "Brook";
}
/**
* Get DuplicationFactor
*
* @param numberOfParticles number of particles
*
* @return DuplicationFactor
*/
int BrookPlatform::getDuplicationFactor( int numberOfParticles ) const {
// ---------------------------------------------------------------------------------------
// static const std::string methodName = "BrookPlatform::getDuplicationFactor";
// ---------------------------------------------------------------------------------------
// default value
int duplicationFactor = 4;
// set only if _minSuggestedThreads is available from board
if( _minSuggestedThreads > 0 ){
float threads = static_cast<float>( _minSuggestedThreads );
float numP = static_cast<float>( numberOfParticles );
float iUnroll = 4.0f;
float factor = (threads*iUnroll)/numP;
if( (factor*numP) < (threads*iUnroll) ){
factor += 1.0f;
}
if( factor <= 1.0f ){
duplicationFactor = 1;
} else {
duplicationFactor = static_cast<int>( ceil( factor*0.25f ) );
duplicationFactor *= 4;
}
}
return duplicationFactor;
}
/**
* Return platform speed
*
......@@ -473,8 +535,10 @@ void BrookPlatform::contextCreated( OpenMMContextImpl& context ) const {
// ---------------------------------------------------------------------------------------
OpenMMBrookInterface* openMMBrookInterface = new OpenMMBrookInterface( getParticleStreamWidth() );
int particles = context.getSystem().getNumParticles();
OpenMMBrookInterface* openMMBrookInterface = new OpenMMBrookInterface( getParticleStreamWidth(), getDuplicationFactor( particles ) );
// openMMBrookInterface->setLog( stderr );
context.setPlatformData( openMMBrookInterface );
}
......
......@@ -50,7 +50,7 @@ using namespace std;
*
*/
OpenMMBrookInterface::OpenMMBrookInterface( int streamWidth ) : _particleStreamWidth(streamWidth){
OpenMMBrookInterface::OpenMMBrookInterface( int streamWidth, int duplicationFactor ) : _particleStreamWidth(streamWidth){
// ---------------------------------------------------------------------------------------
......@@ -74,6 +74,12 @@ OpenMMBrookInterface::OpenMMBrookInterface( int streamWidth ) : _particleStreamW
for( int ii = 0; ii < LastBondForce; ii++ ){
_bondParameters[ii] = NULL;
}
if( duplicationFactor < 1 ){
duplicationFactor = 4;
}
_brookNonBonded.setDuplicationFactor( duplicationFactor );
_brookGbsa.setDuplicationFactor( duplicationFactor );
}
/**
......
......@@ -51,7 +51,7 @@ class OpenMMBrookInterface {
public:
OpenMMBrookInterface( int streamWidth );
OpenMMBrookInterface( int streamWidth, int duplicationFactor );
~OpenMMBrookInterface();
......
......@@ -249,6 +249,27 @@ kernel void kinvmap_gather6(
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_2(
float strwidth, //stream width of the dihedral forces
......@@ -272,6 +293,132 @@ kernel void kinvmap_gather2_2(
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_3(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_4(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather2_5(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float4 invmap4_3<>,
float4 invmap4_4<>,
float4 invmap4_5<>,
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_5, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_2(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1<>, //indices into the dihedral forces
float4 invmap3_2<>, //indices into the dihedral forces
float4 invmap3_3<>,
float3 forces3[][], //dihedral forces
float4 invmap4_1<>, //indices into the dihedral forces
float4 invmap4_2<>, //indices into the dihedral forces
float3 forces4[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_3(
float strwidth, //stream width of the dihedral forces
......@@ -298,7 +445,6 @@ kernel void kinvmap_gather3_3(
outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
}
//Takes three + four inverse maps
kernel void kinvmap_gather3_4(
float strwidth, //stream width of the dihedral forces
......@@ -407,6 +553,86 @@ kernel void kinvmap_gather5_2(
}
//Takes five + two inverse maps
kernel void kinvmap_gather5_3(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float4 invmap5_2<>, //indices into the dihedral forces
float4 invmap5_3<>,
float4 invmap5_4<>,
float4 invmap5_5<>,
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float4 invmap2_3<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather4_3(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float4 invmap5_2<>, //indices into the dihedral forces
float4 invmap5_3<>,
float4 invmap5_4<>,
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float4 invmap2_3<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather1_2(
float strwidth, //stream width of the dihedral forces
float4 invmap5_1<>, //indices into the dihedral forces
float3 forces5[][], //dihedral forces
float4 invmap2_1<>, //indices into the dihedral forces
float4 invmap2_2<>, //indices into the dihedral forces
float3 forces2[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather4_2(
float strwidth, //stream width of the dihedral forces
......
......@@ -91,6 +91,81 @@ void kinvmap_gather2_2 (const float strwidth,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_3 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather1_2 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_4 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather2_5 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream invmap4_3,
::brook::stream invmap4_4,
::brook::stream invmap4_5,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_2 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
::brook::stream invmap3_3,
::brook::stream forces3,
::brook::stream invmap4_1,
::brook::stream forces4,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather3_3 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream invmap3_2,
......@@ -154,6 +229,35 @@ void kinvmap_gather5_2 (const float strwidth,
::brook::stream outforce);
void kinvmap_gather5_3 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream invmap5_5,
::brook::stream forces5,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream invmap2_3,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather4_3 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
::brook::stream invmap5_3,
::brook::stream invmap5_4,
::brook::stream forces5,
::brook::stream invmap2_1,
::brook::stream invmap2_2,
::brook::stream invmap2_3,
::brook::stream forces2,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather4_2 (const float strwidth,
::brook::stream invmap4_1,
::brook::stream invmap4_2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment