Commit cb130f92 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods

parent cc8b4de0
...@@ -1536,3 +1536,276 @@ std::string BrookBonded::getContentsString( int level ) const { ...@@ -1536,3 +1536,276 @@ std::string BrookBonded::getContentsString( int level ) const {
return message.str(); return message.str();
} }
/*
* Helper functions for building inverse maps for
* torsions, impropers and angles.
*
* For each atom, calculates the positions at which it's
* forces are to be picked up from and stores the position
* in the appropriate index.
*
* Input: number of dihedrals, the atom indices, and a flag indicating
* whether we're doing i(0), j(1), k(2) or l(3)
* Output: an array of counts per atom
* arrays of inversemaps
* nimaps - the number of invmaps actually used.
*
* @param posflag 0-niatoms-1
* @param niatoms 3 for angles, 4 for torsions, impropers
* @param nints number of interactions
* @param natoms number of atoms
* @param *atoms gromacs interaction list
* @param nmaps maximum number of inverse maps
* @param counts[] output counts of how many places each atom occurs
* @param *invmaps[] output array of nmaps inverse maps
* @param *nimaps, output max number of inverse maps actually used
*
* @return DefaultReturnValue, unless error in which case exits w/ OpenMM exception
*
**/
int BrookBonded::gpuCalcInvMap( int posflag, int niatoms, int nints, int natoms,
int *atoms, int nmaps, int counts[], float4 *invmaps[],
int *nimaps ){
// ---------------------------------------------------------------------------------------
int i, j;
int atom;
int mapnum, mapcomp;
static const std::string methodName = "BrookBonded::gpuCalcInvMap";
static const unsigned int MAX_LINE_CHARS = 256;
//char value[MAX_LINE_CHARS];
static const char* Set = "Set";
static const char* NotSet = "Not set";
static const int PrintOn = 0;
// ---------------------------------------------------------------------------------------
memset( counts, 0, sizeof( int )*natoms );
for( i = 0; i < nmaps; i++ ){
for( j = 0; j < natoms; j++ ){
invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
}
}
//This will hold the number of imaps actually used
*nimaps = -1;
//Now note down the positions where each atom occurs
if( PrintOn && getLog() ){
(void) fprintf( getLog(), "%s: pos=%d ni=%d nints=%d natoms=%d nmaps=<%d>\n", methodName.c_str(), posflag, niatoms, nints, natoms, nmaps );
(void) fflush( getLog() );
}
int atomRange[2] = { 90000000, -90000000 };
int mapnumRange[2] = { 90000000, -90000000 };
for( i = 0; i < nints; i++ ){
//This is our atom
atom = atoms[ (niatoms + 1) * i + posflag + 1 ];
//Special for merged bondeds
if ( atom == -1 ){
continue;
}
if( atom < atomRange[0] ){
atomRange[0] = atom;
}
if( atom > atomRange[1] ){
atomRange[1] = atom;
}
//Check to make sure we're inside the limits
if ( counts[atom] > nmaps * 4 ){
if( PrintOn && getLog() ){
(void) fprintf( getLog(), "%s Atom %d has too many proper dihedrals(%d, max %d)\n",
methodName.c_str(), atom, counts[atom], nmaps*4 );
(void) fflush( getLog() );
}
std::stringstream message;
message << methodName << " Atom " << atom << " has too many proper dihedrals; valid range:(" << counts[atom] << ", " << nmaps*4 << ")";
throw OpenMMException( message.str() );
}
//Which invmap will this go into
mapnum = counts[atom] / 4;
if ( mapnum > *nimaps )
*nimaps = mapnum;
//Which component will it be
mapcomp = counts[atom] % 4;
//Set it
//This is silly, but otherwise I have to declare it as float*
//and things get even more confusing. :)
switch (mapcomp){
case 0: invmaps[mapnum][atom].x = (float) i; break;
case 1: invmaps[mapnum][atom].y = (float) i; break;
case 2: invmaps[mapnum][atom].z = (float) i; break;
case 3: invmaps[mapnum][atom].w = (float) i; break;
default:
if( PrintOn && getLog() ){
(void) fprintf( getLog(), "mapcomp %d invalid -- impossible!\n", mapcomp );
(void) fflush( getLog() );
}
std::stringstream message;
message << methodName << " mapcomp " << mapcomp << " invalid -- actually impossible!";
throw OpenMMException( message.str() );
break;
}
counts[atom]++;
if( mapnum < mapnumRange[0] ){
mapnumRange[0] = mapnum;
}
if( mapnum > mapnumRange[1] ){
mapnumRange[1] = mapnum;
}
//fprintf( gpu->log, "%d atom=%d mapcomp=%d counts[]=%d mapnum=%d\n", i, atom, mapcomp, counts[atom], mapnum );
}
(*nimaps)++;
if( PrintOn && getLog() ){
(void) fprintf( getLog(), "%s mnmaps=%d Ranges: atom [%d %d] mapnum [%d %d]\n",
methodName.c_str(), *nimaps, atomRange[0], atomRange[1], mapnumRange[0], mapnumRange[1] );
(void) fflush( getLog() );
}
return DefaultReturnValue;
}
void BrookBonded::gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile ){
int i;
int j;
for( i = 0; i < natoms; i++ ){
fprintf( logFile, "%d %d ", i, counts[i] );
for( j = 0; j < nmaps; j++ ){
fprintf( logFile, "%6.0f %6.0f %6.0f %6.0f", invmap[j][i].x, invmap[j][i].y,
invmap[j][i].z, invmap[j][i].w );
}
fprintf( logFile, "\n");
}
}
/* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true
* */
int BrookBonded::gpuCalcInvMap_merged(
int nints, //number of interactions
int natoms, //number of atoms
int *atoms, //ijkl,ijkl,ijkl...
int nmaps, //maximum number of inverse maps
int counts[], //output counts of how many places each atom occurs
float4 *invmaps[], //output array of nmaps inverse maps
int *nimaps //output max number of inverse maps actually used
){
int i, j;
int atom;
int mapnum, mapcomp;
int pos;
for( i = 0; i < natoms; i++ )
counts[i] = 0;
for( i = 0; i < nmaps; i++ ){
for( j = 0; j < natoms; j++ ){
invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
}
}
//This will hold the number of imaps actually used
*nimaps = -1;
//For each atom
for( i = 0; i < nints; i++ ){
for( j = 0; j < 4; j++ ){
atom = atoms[ i * 4 + j ];
if ( atom == -1 ){
//Nothing to be done for this atom, go to next
continue;
}
//Which map
mapnum = counts[ atom ] / 4;
//Make sure we have space
if ( mapnum >= nmaps ){
printf( "Atom %d has too many bondeds(%d, max %d)\n",
atom, counts[atom], nmaps * 4 );
return 0;
}
if ( mapnum > *nimaps ){
*nimaps = mapnum;
}
//Which component
mapcomp = counts[ atom ] % 4;
//Encode target stream and position
pos = 100000 * j + i;
switch ( mapcomp ){
case 0: invmaps[mapnum][atom].x = (float) pos; break;
case 1: invmaps[mapnum][atom].y = (float) pos; break;
case 2: invmaps[mapnum][atom].z = (float) pos; break;
case 3: invmaps[mapnum][atom].w = (float) pos; break;
}
counts[ atom ]++;
}
}
(*nimaps)++;
return 1;
}
/* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel
*
* buf should be nimaps * natoms large.
* */
int BrookBonded::gpuRepackInvMap_merged( int natoms, int nmaps, int *counts,
float4 *invmaps[], float4 *buf ){
int i, j;
int nmaps_i;
for( i = 0; i < natoms; i++ ){
for( j = 0; j < nmaps; j++ ){
buf[ i + j*natoms ] = float4( -1.0f, -1.0f, -1.0f, -1.0f );
}
}
for( i = 0; i < natoms; i++ ){
nmaps_i = counts[i] / 4;
if ( counts[i] % 4 )
nmaps_i++;
for( j = 0; j < nmaps_i; j++ ){
buf[ i + j * natoms ] = invmaps[j][i];
}
}
return 1;
}
...@@ -421,6 +421,58 @@ class BrookBonded : public BrookCommon { ...@@ -421,6 +421,58 @@ class BrookBonded : public BrookCommon {
int validateInverseMapStreamCount( int index, int count ) const; int validateInverseMapStreamCount( int index, int count ) const;
/*
* Helper functions for building inverse maps for
* torsions, impropers and angles.
*
* For each atom, calculates the positions at which it's
* forces are to be picked up from and stores the position
* in the appropriate index.
*
* Input: number of dihedrals, the atom indices, and a flag indicating
* whether we're doing i(0), j(1), k(2) or l(3)
* Output: an array of counts per atom
* arrays of inversemaps
* nimaps - the number of invmaps actually used.
*
* @param posflag 0-niatoms-1
* @param niatoms 3 for angles, 4 for torsions, impropers
* @param nints number of interactions
* @param natoms number of atoms
* @param *atoms gromacs interaction list
* @param nmaps maximum number of inverse maps
* @param counts[] output counts of how many places each atom occurs
* @param *invmaps[] output array of nmaps inverse maps
* @param *nimaps, output max number of inverse maps actually used
*
* @return DefaultReturnValue, unless error in which case exits w/ OpenMM exception
*
**/
int gpuCalcInvMap( int posflag, int niatoms, int nints, int natoms,
int *atoms, int nmaps, int counts[], float4 *invmaps[],
int *nimaps );
void gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile );
/* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true
* */
int gpuCalcInvMap_merged( int nints, int natoms, int *atoms, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
/* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel
*
* buf should be nimaps * natoms large.
* */
int gpuRepackInvMap_merged( int natoms, int nmaps, int *counts, float4 *invmaps[], float4 *buf );
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -36,9 +36,8 @@ ...@@ -36,9 +36,8 @@
#include "BrookStreamImpl.h" #include "BrookStreamImpl.h"
#include "BrookCalcGBSAOBCForceFieldKernel.h" #include "BrookCalcGBSAOBCForceFieldKernel.h"
#include "force.h" #include "gpu/kgbsa.h"
#include "kgbsa.h" #include "gpu/kforce.h"
#include "kforce.h"
#include "math.h" #include "math.h"
using namespace OpenMM; using namespace OpenMM;
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "kernels.h" #include "kernels.h"
#include "SimTKUtilities/SimTKOpenMMRealType.h" #include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "BrookGbsa.h" #include "BrookGbsa.h"
namespace OpenMM { namespace OpenMM {
......
...@@ -36,10 +36,9 @@ ...@@ -36,10 +36,9 @@
#include "BrookStreamImpl.h" #include "BrookStreamImpl.h"
#include "BrookCalcStandardMMForceFieldKernel.h" #include "BrookCalcStandardMMForceFieldKernel.h"
#include "kforce.h" #include "gpu/kforce.h"
#include "kinvmap_gather.h" #include "gpu/kinvmap_gather.h"
#include "ReferencePlatform.h" #include "ReferencePlatform.h"
#include "ReferenceFloatStreamImpl.h"
#include "VerletIntegrator.h" #include "VerletIntegrator.h"
#include "StandardMMForceField.h" #include "StandardMMForceField.h"
...@@ -254,7 +253,7 @@ void BrookCalcStandardMMForceFieldKernel::initialize( ...@@ -254,7 +253,7 @@ void BrookCalcStandardMMForceFieldKernel::initialize(
const vector<int> periodicTorsionInd = *periodicTorsionI_it; const vector<int> periodicTorsionInd = *periodicTorsionI_it;
const vector<double> periodicTorsionPrm = *periodicTorsionP_it; const vector<double> periodicTorsionPrm = *periodicTorsionP_it;
_refForceField->setPeriodicTorsionParameters( ii, periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3], _refForceField->setPeriodicTorsionParameters( ii, periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3],
periodicTorsionPrm[2], periodicTorsionPrm[1], periodicTorsionPrm[0] ); (int) (periodicTorsionPrm[2] + 0.001), periodicTorsionPrm[1], periodicTorsionPrm[0] );
/* /*
printf( "PeriodicTor: [%d %d %d %d] [%.5e %.5e %.5e]\n", periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3], printf( "PeriodicTor: [%d %d %d %d] [%.5e %.5e %.5e]\n", periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3],
periodicTorsionPrm[2], periodicTorsionPrm[1], periodicTorsionPrm[0] ); fflush( stdout ); periodicTorsionPrm[2], periodicTorsionPrm[1], periodicTorsionPrm[0] ); fflush( stdout );
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "kernels.h" #include "kernels.h"
#include "SimTKUtilities/SimTKOpenMMRealType.h" #include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "BrookBonded.h" #include "BrookBonded.h"
#include "BrookNonBonded.h" #include "BrookNonBonded.h"
#include "StandardMMForceField.h" #include "StandardMMForceField.h"
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include "BrookPlatform.h" #include "BrookPlatform.h"
#include "BrookStreamInternal.h" #include "BrookStreamInternal.h"
#include "SimTKUtilities/SimTKOpenMMRealType.h" #include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
namespace OpenMM { namespace OpenMM {
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "BrookKernelFactory.h" #include "BrookKernelFactory.h"
#include "OpenMMException.h" #include "OpenMMException.h"
#include "kernels.h" #include "kernels.h"
#include "SimTKUtilities/SimTKOpenMMRealType.h" #include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <brook/brook.hpp> #include <brook/brook.hpp>
#include <stdlib.h> #include <stdlib.h>
#include <sstream> #include <sstream>
...@@ -83,10 +83,20 @@ BrookPlatform::BrookPlatform( ){ ...@@ -83,10 +83,20 @@ BrookPlatform::BrookPlatform( ){
// get Brook runtime // get Brook runtime
#ifdef WIN32
char* runtime;
size_t numberOfEnv;
_dupenv_s( &runtime, &numberOfEnv, "brt_runtime" );
#else
char* runtime = getenv( "brt_runtime" ); char* runtime = getenv( "brt_runtime" );
#endif
_initializeKernelFactory( ); _initializeKernelFactory( );
_setBrookRuntime( runtime ); _setBrookRuntime( runtime );
#ifdef WIN32
free( runtime );
#endif
} }
/** /**
......
...@@ -32,11 +32,7 @@ ...@@ -32,11 +32,7 @@
#include <sstream> #include <sstream>
#include "BrookRandomNumberGenerator.h" #include "BrookRandomNumberGenerator.h"
#include "OpenMMException.h" #include "OpenMMException.h"
#include "kupdatesd.h" #include "gpu/kupdatesd.h"
// use random number generator
#include "SimTKOpenMMUtilities.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
......
...@@ -35,10 +35,6 @@ ...@@ -35,10 +35,6 @@
#include "OpenMMException.h" #include "OpenMMException.h"
#include "BrookStreamImpl.h" #include "BrookStreamImpl.h"
// use random number generator
#include "SimTKOpenMMUtilities.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
......
...@@ -34,13 +34,13 @@ ...@@ -34,13 +34,13 @@
#include "BrookPlatform.h" #include "BrookPlatform.h"
#include "OpenMMException.h" #include "OpenMMException.h"
#include "BrookStreamImpl.h" #include "BrookStreamImpl.h"
#include "kshakeh.h" #include "gpu/kshakeh.h"
#include "kupdatesd.h" #include "gpu/kupdatesd.h"
#include "kcommon.h" #include "gpu/kcommon.h"
// use random number generator // use random number generator
#include "SimTKOpenMMUtilities.h" #include "../../reference/src/SimTKUtilities/SimTKOpenMMUtilities.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include <brook/brook.hpp> #include <brook/brook.hpp>
#include "SimTKUtilities/SimTKOpenMMRealType.h"
namespace OpenMM { namespace OpenMM {
......
/****************************************************************
* This file is part of the gpu acceleration library for gromacs.
* Author: V. Vishal
* Copyright (C) Pande Group, Stanford, 2006
*****************************************************************/
#include <stdio.h>
#include <brook/brook.hpp>
// #include "typedefs.h"
#include "invmap.h"
/*
* Helper functions for building inverse maps for
* torsions, impropers and angles.
*
* */
/*
* For each atom, calculates the positions at which it's
* forces are to be picked up from and stores the position
* in the appropriate index.
*
* Input: number of dihedrals, the atom indices, and a flag indicating
* whether we're doing i(0), j(1), k(2) or l(3)
* Output: an array of counts per atom
* arrays of inversemaps
* nimaps - the number of invmaps actually used.
*
* */
int
gpuCalcInvMap(
int posflag, //0-niatoms-1
int niatoms, //3 for angles, 4 for torsions, impropers
int nints, //number of interactions
int natoms, //number of atoms
int *atoms, //gromacs interaction list
int nmaps, //maximum number of inverse maps
int counts[], //output counts of how many places each atom occurs
float4 *invmaps[], //output array of nmaps inverse maps
int *nimaps //output max number of inverse maps actually used
)
{
int i, j;
int atom;
int mapnum, mapcomp;
for ( i = 0; i < natoms; i++ )
counts[i] = 0;
for ( i = 0; i < nmaps; i++ ) {
for ( j = 0; j < natoms; j++ ) {
invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
}
}
//This will hold the number of imaps actually used
*nimaps = -1;
printf( "gpuCalcInvMap: posflag=%d niatoms=%d nints=%d natoms=%d nmaps=%d\n",
posflag, niatoms, nints, natoms, nmaps );
//Now note down the positions where each atom occurs
for ( i = 0; i < nints; i++ ) {
//This is our atom
atom = atoms[ (niatoms + 1) * i + posflag + 1 ];
//Special for merged bondeds
if ( atom == -1 ) {
continue;
}
//Check to make sure we're inside the limits
if ( counts[atom] > nmaps * 4 ) {
printf( "Atom %d has too many proper dihedrals(%d, max %d)\n",
atom, counts[atom], nmaps * 4 );
return 0;
}
//Which invmap will this go into
mapnum = counts[atom] / 4;
if ( mapnum > *nimaps )
*nimaps = mapnum;
//Which component will it be
mapcomp = counts[atom] % 4;
//Set it
//This is silly, but otherwise I have to declare it as float*
//and things get even more confusing. :)
switch (mapcomp) {
case 0: invmaps[mapnum][atom].x = (float) i; break;
case 1: invmaps[mapnum][atom].y = (float) i; break;
case 2: invmaps[mapnum][atom].z = (float) i; break;
case 3: invmaps[mapnum][atom].w = (float) i; break;
}
counts[atom]++;
printf( "Atom %d count=%d max %d mapcomp=%d val=%d mapnum=%d\n", atom, counts[atom],
nmaps * 4, mapcomp, i, mapnum );
}
(*nimaps)++;
return 1;
}
void
gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile )
{
int i;
int j;
for ( i = 0; i < natoms; i++ ) {
fprintf( logFile, "%d %d ", i, counts[i] );
for ( j = 0; j < nmaps; j++ ) {
fprintf( logFile, "%6.0f %6.0f %6.0f %6.0f", invmap[j][i].x, invmap[j][i].y,
invmap[j][i].z, invmap[j][i].w );
}
fprintf( logFile, "\n");
}
}
/* We are still plagued by kernel call overheads. This is for a big fat
* merged inverse gather kernel:
* Since we have 32 bit floats, we have 23 bits of mantissa or the largest
* integer we can represent is 2^23. So it should be quite safe to add
* 100000 * n to the index where n is the stream in which we should do the
* lookup. This assumes that nints < 100000, preferably nints << 100000
* which should always be true
* */
int
gpuCalcInvMap_merged(
int nints, //number of interactions
int natoms, //number of atoms
int *atoms, //ijkl,ijkl,ijkl...
int nmaps, //maximum number of inverse maps
int counts[], //output counts of how many places each atom occurs
float4 *invmaps[], //output array of nmaps inverse maps
int *nimaps //output max number of inverse maps actually used
)
{
int i, j;
int atom;
int mapnum, mapcomp;
int pos;
for ( i = 0; i < natoms; i++ )
counts[i] = 0;
for ( i = 0; i < nmaps; i++ ) {
for ( j = 0; j < natoms; j++ ) {
invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
}
}
//This will hold the number of imaps actually used
*nimaps = -1;
//For each atom
for ( i = 0; i < nints; i++ ) {
for ( j = 0; j < 4; j++ ) {
atom = atoms[ i * 4 + j ];
if ( atom == -1 ) {
//Nothing to be done for this atom, go to next
continue;
}
//Which map
mapnum = counts[ atom ] / 4;
//Make sure we have space
if ( mapnum >= nmaps ) {
printf( "Atom %d has too many bondeds(%d, max %d)\n",
atom, counts[atom], nmaps * 4 );
return 0;
}
if ( mapnum > *nimaps ) {
*nimaps = mapnum;
}
//Which component
mapcomp = counts[ atom ] % 4;
//Encode target stream and position
pos = 100000 * j + i;
switch ( mapcomp ) {
case 0: invmaps[mapnum][atom].x = (float) pos; break;
case 1: invmaps[mapnum][atom].y = (float) pos; break;
case 2: invmaps[mapnum][atom].z = (float) pos; break;
case 3: invmaps[mapnum][atom].w = (float) pos; break;
}
counts[ atom ]++;
}
}
(*nimaps)++;
return 1;
}
/* Repacks the invmap streams for more efficient access in the
* merged inverse gather kernel
*
* buf should be nimaps * natoms large.
* */
int
gpuRepackInvMap_merged( int natoms, int nmaps, int *counts,
float4 *invmaps[], float4 *buf )
{
int i, j;
int nmaps_i;
for ( i = 0; i < natoms; i++ ) {
for ( j = 0; j < nmaps; j++ ) {
buf[ i + j*natoms ] = float4( -1.0f, -1.0f, -1.0f, -1.0f );
}
}
for ( i = 0; i < natoms; i++ ) {
nmaps_i = counts[i] / 4;
if ( counts[i] % 4 )
nmaps_i++;
for ( j = 0; j < nmaps_i; j++ ) {
buf[ i + j * natoms ] = invmaps[j][i];
}
}
return 1;
}
This diff is collapsed.
/****************************************************************
* This file is part of the gpu acceleration library for gromacs.
* Author: V. Vishal
* Copyright (C) Pande Group, Stanford, 2006
*****************************************************************/
//Harmonic angles kernel
//Input is a stream of triplets i, j, k
//parms is float2( theta0, kA )
//Output is three streams of forces fi, fj, fk
//Again, this is kept simple for now, can be optimized
//later as necessary
kernel void kangles_harmonic(
float xstrwidth,
float3 atoms<>,
float2 parms<>,
float4 posq[][],
out float3 fi<>,
out float3 fj<>,
out float3 fk<>
) {
float theta;
float dx, dx2, fs;
float st, dvdt, cik, sth, nrkj2, nrij2, cii, ckk, costheta, sintheta;
float rij2, rkj2;
float2 idx;
float2 ai, aj, ak;
float3 xi, xj, xk, rij, rkj;
ai.y = floor( atoms.x / xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
aj.y = floor( atoms.y / xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
ak.y = floor( atoms.z / xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
rij = posq[ ai ].xyz - posq[ aj ].xyz; //3
rkj = posq[ ak ].xyz - posq[ aj ].xyz; //3
rij2 = dot( rij, rij ); //5
rkj2 = dot( rkj, rkj ); //5
costheta = dot( rij, rkj ) / sqrt( rij2 * rkj2 ); //8
costheta = clamp( costheta, -1.0, 1.0 );
theta = acos( costheta ); //1 flop, ouch
sintheta = sqrt( 1 - costheta * costheta ); //3
dx = theta - parms.x; //1
dx2 = dx * dx; //1
/*scalar force = dv/dtheta*/
fs = -parms.y * dx; //1
st = fs / sintheta; //1
st = clamp( st, -1000000.0, 1000000.0 ); //Does this work on the gpu for st=inf?
sth = st * costheta; //1
nrkj2 = dot( rkj, rkj ); //5
nrij2 = dot( rij, rij ); //5
cik = st * rsqrt( nrkj2 * nrij2 ); //3
cii = sth / nrij2; //1
ckk = sth / nrkj2; //1
fi = -( cik * rkj - cii * rij ); //7
fk = -( cik * rij - ckk * rkj ); //7
fj = -fi - fk; //3
//Total flops: 64
}
#ifndef __KCOMMON_H__
#define __KCOMMON_H__
void kgetxyz (::brook::stream instr,
::brook::stream outstr);
void kzerof3 (::brook::stream outstr);
void kzerof4 (::brook::stream outstr);
void kzerof4 (::brook::stream outstr);
void ksetf4 (const float4 val, ::brook::stream outstr);
void kadd3( ::brook::stream instr, ::brook::stream outstr );
void ksetStr3( ::brook::stream instr, ::brook::stream outstr );
#endif // __KCOMMON_H__
/****************************************************************
* This file is part of the gpu acceleration library for gromacs.
* Author: V. Vishal
* Copyright (C) Pande Group, Stanford, 2006
*****************************************************************/
//Kernel to set the xyz components of a float4 stream
//Used for changing the coordinates without changing
//the charges in strPosQ
kernel void ksetxyz( float3 instr<>, float4 before<>, out float4 after<> ) {
after.xyz = instr;
after.w = before.w;
}
//Inverse of above
kernel void kgetxyz( float4 instr<>, out float3 outstr<> ) {
outstr = instr.xyz;
}
//Zeroes out a stream
kernel void kzerof3( out float3 outstr<> ) {
outstr = float3( 0.0, 0.0, 0.0 );
}
//Zeros out a stream
kernel void kzerof4( out float4 outstr<> ) {
outstr = float4( 0.0, 0.0, 0.0, 0.0 );
}
kernel void ksetf4( float4 val, out float4 outstr<> ) {
outstr = val;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment