Mods

cb130f92 · Mark Friedrichs · cc8b4de0 · cb130f92 · cb130f92 · cb130f92
Commit cb130f92 authored Sep 25, 2008 by Mark Friedrichs
20 changed files
--- a/platforms/brook/src/BrookBonded.cpp
+++ b/platforms/brook/src/BrookBonded.cpp
@@ -1536,3 +1536,276 @@ std::string BrookBonded::getContentsString( int level ) const {
   return message.str();
 }

+/*
+ * Helper functions for building inverse maps for 
+ * torsions, impropers and angles.
+ * 
+ * For each atom, calculates the positions at which it's
+ * forces are to be picked up from and stores the position
+ * in the appropriate index.
+ *
+ * Input: number of dihedrals, the atom indices, and a flag indicating
+ *        whether we're doing i(0), j(1), k(2) or l(3)
+ * Output: an array of counts per atom
+ *         arrays of inversemaps
+ *         nimaps - the number of invmaps actually used.
+ *
+ * @param posflag       0-niatoms-1
+ * @param niatoms       3 for angles, 4 for torsions, impropers
+ * @param nints         number of interactions
+ * @param natoms        number of atoms
+ * @param *atoms        gromacs interaction list
+ * @param nmaps         maximum number of inverse maps
+ * @param   counts[]    output counts of how many places each atom occurs
+ * @param *invmaps[]    output array of nmaps inverse maps
+ * @param *nimaps,      output max number of inverse maps actually used
+ *
+ * @return DefaultReturnValue, unless error in which case exits w/ OpenMM exception
+ *
+ **/
+
+int BrookBonded::gpuCalcInvMap( int posflag, int niatoms, int nints, int natoms,
+                                int *atoms, int nmaps, int counts[], float4 *invmaps[],
+                                int *nimaps ){
+
+// ---------------------------------------------------------------------------------------
+
+   int i, j;
+   int atom;
+   int mapnum, mapcomp;
+
+   static const std::string methodName      = "BrookBonded::gpuCalcInvMap";
+
+   static const unsigned int MAX_LINE_CHARS = 256;
+   //char value[MAX_LINE_CHARS];
+   static const char* Set                   = "Set";
+   static const char* NotSet                = "Not set";
+   static const int PrintOn                 = 0;
+
+// ---------------------------------------------------------------------------------------
+
+   memset( counts, 0, sizeof( int )*natoms );
+
+   for( i = 0; i < nmaps; i++ ){
+      for( j = 0; j < natoms; j++ ){
+         invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
+      }
+   }
+   
+   //This will hold the number of imaps actually used
+
+   *nimaps = -1;
+
+   //Now note down the positions where each atom occurs
+
+   if( PrintOn && getLog() ){
+      (void) fprintf( getLog(), "%s: pos=%d ni=%d nints=%d natoms=%d nmaps=<%d>\n", methodName.c_str(), posflag, niatoms, nints, natoms, nmaps ); 
+      (void) fflush( getLog() );
+   }
+
+int atomRange[2]   = { 90000000, -90000000 };
+int mapnumRange[2] = { 90000000, -90000000 };
+
+   for(  i = 0; i < nints; i++ ){
+      //This is our atom
+      atom = atoms[ (niatoms + 1) * i + posflag + 1 ];
+
+      //Special for merged bondeds
+      if ( atom == -1 ){
+         continue;
+      }
+
+if( atom < atomRange[0] ){
+   atomRange[0] = atom;
+}
+if( atom > atomRange[1] ){
+   atomRange[1] = atom;
+}
+      //Check to make sure we're inside the limits
+      if ( counts[atom] > nmaps * 4 ){
+         if( PrintOn && getLog() ){
+            (void) fprintf( getLog(), "%s Atom %d has too many proper dihedrals(%d, max %d)\n",
+                            methodName.c_str(), atom, counts[atom], nmaps*4 );
+            (void) fflush( getLog() );
+         }
+         std::stringstream message;
+         message << methodName << " Atom " << atom << " has too many proper dihedrals; valid range:(" << counts[atom] << ", " << nmaps*4 << ")";
+         throw OpenMMException( message.str() );
+      }
+      
+      //Which invmap will this go into
+
+      mapnum = counts[atom] / 4;
+
+      if ( mapnum > *nimaps )
+         *nimaps = mapnum;
+
+      //Which component will it be
+      mapcomp = counts[atom] % 4;
+
+      //Set it
+      //This is silly, but otherwise I have to declare it as float*
+      //and things get even more confusing. :)
+      switch (mapcomp){
+         case 0: invmaps[mapnum][atom].x = (float) i; break;
+         case 1: invmaps[mapnum][atom].y = (float) i; break;
+         case 2: invmaps[mapnum][atom].z = (float) i; break;
+         case 3: invmaps[mapnum][atom].w = (float) i; break;
+         default:
+            if( PrintOn && getLog() ){
+               (void) fprintf( getLog(), "mapcomp %d invalid -- impossible!\n", mapcomp );
+               (void) fflush( getLog() );
+            }
+            std::stringstream message;
+            message << methodName << " mapcomp " << mapcomp << " invalid -- actually impossible!";
+            throw OpenMMException( message.str() );
+            break;
+      }
+      
+      counts[atom]++;
+
+if( mapnum < mapnumRange[0] ){
+   mapnumRange[0] = mapnum;
+}
+if( mapnum > mapnumRange[1] ){
+   mapnumRange[1] = mapnum;
+}
+
+//fprintf( gpu->log, "%d atom=%d  mapcomp=%d counts[]=%d mapnum=%d\n", i, atom, mapcomp, counts[atom], mapnum );
+
+   }
+
+   (*nimaps)++;
+
+if( PrintOn && getLog() ){
+   (void) fprintf( getLog(), "%s mnmaps=%d Ranges: atom [%d %d] mapnum [%d %d]\n",
+                   methodName.c_str(), *nimaps, atomRange[0], atomRange[1], mapnumRange[0], mapnumRange[1] );
+   (void) fflush( getLog() );
+}
+
+   return DefaultReturnValue;	
+}
+
+
+void BrookBonded::gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile ){
+   int i;
+   int j;
+   for(  i = 0; i < natoms; i++ ){
+      fprintf( logFile, "%d %d ", i, counts[i] );
+      for(  j = 0; j < nmaps; j++ ){
+         fprintf( logFile, "%6.0f %6.0f %6.0f %6.0f", invmap[j][i].x, invmap[j][i].y, 
+                  invmap[j][i].z, invmap[j][i].w );
+      }
+      fprintf( logFile, "\n");
+   }
+}
+
+/* We are still plagued by kernel call overheads. This is for a big fat
+ * merged inverse gather kernel:
+ * Since we have 32 bit floats, we have 23 bits of mantissa or the largest
+ * integer we can represent is 2^23. So it should be quite safe to add 
+ * 100000 * n to the index where n is the stream in which we should do the
+ * lookup. This assumes that nints < 100000, preferably nints << 100000
+ * which should always be true
+ * */
+int BrookBonded::gpuCalcInvMap_merged( 
+      int nints,    //number of interactions
+      int natoms,   //number of atoms
+      int *atoms,   //ijkl,ijkl,ijkl...
+      int nmaps,      //maximum number of inverse maps
+        int counts[],   //output counts of how many places each atom occurs
+      float4 *invmaps[], //output array of nmaps inverse maps
+      int *nimaps        //output max number of inverse maps actually used
+      ){
+   int i, j;
+   int atom;
+   int mapnum, mapcomp;
+   int pos;
+   
+   for(  i = 0; i < natoms; i++ )
+      counts[i] = 0;
+
+   for(  i = 0; i < nmaps; i++ ){
+      for(  j = 0; j < natoms; j++ ){
+         invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
+      }
+   }
+
+   //This will hold the number of imaps actually used
+   *nimaps = -1;
+
+   //For each atom
+   for(  i = 0; i < nints; i++ ){
+      for(  j = 0; j < 4; j++ ){
+         
+         atom = atoms[ i * 4 + j ];
+         
+         if ( atom == -1 ){
+         	//Nothing to be done for this atom, go to next
+         	continue;
+         }
+         
+         //Which map
+         mapnum = counts[ atom ] / 4;
+         
+         //Make sure we have space
+         if ( mapnum >= nmaps ){
+         	printf( "Atom %d has too many bondeds(%d, max %d)\n",
+         			 atom, counts[atom], nmaps * 4 );
+         	return 0;
+         }
+         	
+         if ( mapnum > *nimaps ){
+         	*nimaps = mapnum;
+         }
+
+         //Which component
+         mapcomp = counts[ atom ] % 4;
+         
+         //Encode target stream and position
+         pos = 100000 * j + i;
+
+         switch ( mapcomp ){
+         	case 0: invmaps[mapnum][atom].x = (float) pos; break;
+         	case 1: invmaps[mapnum][atom].y = (float) pos; break;
+         	case 2: invmaps[mapnum][atom].z = (float) pos; break;
+         	case 3: invmaps[mapnum][atom].w = (float) pos; break;
+         }
+
+         counts[ atom ]++;
+
+      }
+   }
+   
+   (*nimaps)++;
+   return 1;
+}
+
+/* Repacks the invmap streams for more efficient access in the
+ * merged inverse gather kernel
+ *
+ * buf should be nimaps * natoms large.
+ * */
+int BrookBonded::gpuRepackInvMap_merged( int natoms, int nmaps, int *counts, 
+                                         float4 *invmaps[], float4 *buf ){
+   int i, j;
+   int nmaps_i;
+
+   for(  i = 0; i < natoms; i++ ){
+      for(  j = 0; j < nmaps; j++ ){
+         buf[ i + j*natoms ] = float4( -1.0f, -1.0f, -1.0f, -1.0f );
+      }
+   }
+   
+   for(  i = 0; i < natoms; i++ ){
+      
+      nmaps_i = counts[i] / 4;
+      if ( counts[i] % 4 ) 
+         nmaps_i++;
+      
+      for(  j = 0; j < nmaps_i; j++ ){
+         buf[ i + j * natoms ] = invmaps[j][i];
+      }
+   }
+   return 1;
+}
--- a/platforms/brook/src/BrookBonded.h
+++ b/platforms/brook/src/BrookBonded.h
@@ -421,6 +421,58 @@ class BrookBonded : public BrookCommon {

      int validateInverseMapStreamCount( int index, int count ) const;
      
+      /*
+       * Helper functions for building inverse maps for 
+       * torsions, impropers and angles.
+       * 
+       * For each atom, calculates the positions at which it's
+       * forces are to be picked up from and stores the position
+       * in the appropriate index.
+       *
+       * Input: number of dihedrals, the atom indices, and a flag indicating
+       *        whether we're doing i(0), j(1), k(2) or l(3)
+       * Output: an array of counts per atom
+       *         arrays of inversemaps
+       *         nimaps - the number of invmaps actually used.
+       *
+       * @param posflag       0-niatoms-1
+       * @param niatoms       3 for angles, 4 for torsions, impropers
+       * @param nints         number of interactions
+       * @param natoms        number of atoms
+       * @param *atoms        gromacs interaction list
+       * @param nmaps         maximum number of inverse maps
+       * @param   counts[]    output counts of how many places each atom occurs
+       * @param *invmaps[]    output array of nmaps inverse maps
+       * @param *nimaps,      output max number of inverse maps actually used
+       *
+       * @return DefaultReturnValue, unless error in which case exits w/ OpenMM exception
+       *
+       **/
+      
+      int gpuCalcInvMap( int posflag, int niatoms, int nints, int natoms,
+                          int *atoms, int nmaps, int counts[], float4 *invmaps[],
+                          int *nimaps );
+      
+      void gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile );
+      
+      /* We are still plagued by kernel call overheads. This is for a big fat
+       * merged inverse gather kernel:
+       * Since we have 32 bit floats, we have 23 bits of mantissa or the largest
+       * integer we can represent is 2^23. So it should be quite safe to add 
+       * 100000 * n to the index where n is the stream in which we should do the
+       * lookup. This assumes that nints < 100000, preferably nints << 100000
+       * which should always be true
+       * */
+      int gpuCalcInvMap_merged( int nints, int natoms, int *atoms, int nmaps, int counts[], float4 *invmaps[], int *nimaps );
+      
+      /* Repacks the invmap streams for more efficient access in the
+       * merged inverse gather kernel
+       *
+       * buf should be nimaps * natoms large.
+       * */
+      int gpuRepackInvMap_merged( int natoms, int nmaps, int *counts, float4 *invmaps[], float4 *buf );
+      
+        
 };

 } // namespace OpenMM

--- a/platforms/brook/src/BrookCalcGBSAOBCForceFieldKernel.cpp
+++ b/platforms/brook/src/BrookCalcGBSAOBCForceFieldKernel.cpp
@@ -36,9 +36,8 @@

 #include "BrookStreamImpl.h"
 #include "BrookCalcGBSAOBCForceFieldKernel.h"
-#include "force.h"
-#include "kgbsa.h"
-#include "kforce.h"
+#include "gpu/kgbsa.h"
+#include "gpu/kforce.h"
 #include "math.h"

 using namespace OpenMM;

--- a/platforms/brook/src/BrookCalcGBSAOBCForceFieldKernel.h
+++ b/platforms/brook/src/BrookCalcGBSAOBCForceFieldKernel.h
@@ -33,7 +33,7 @@
 * -------------------------------------------------------------------------- */

 #include "kernels.h"
-#include "SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
 #include "BrookGbsa.h"

 namespace OpenMM {

--- a/platforms/brook/src/BrookCalcStandardMMForceFieldKernel.cpp
+++ b/platforms/brook/src/BrookCalcStandardMMForceFieldKernel.cpp
@@ -36,10 +36,9 @@

 #include "BrookStreamImpl.h"
 #include "BrookCalcStandardMMForceFieldKernel.h"
-#include "kforce.h"
-#include "kinvmap_gather.h"
+#include "gpu/kforce.h"
+#include "gpu/kinvmap_gather.h"
 #include "ReferencePlatform.h"
-#include "ReferenceFloatStreamImpl.h"
 #include "VerletIntegrator.h"
 #include "StandardMMForceField.h"

@@ -254,7 +253,7 @@ void BrookCalcStandardMMForceFieldKernel::initialize(
      const vector<int>    periodicTorsionInd = *periodicTorsionI_it;
      const vector<double> periodicTorsionPrm = *periodicTorsionP_it;
      _refForceField->setPeriodicTorsionParameters( ii, periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3],
-                                                    periodicTorsionPrm[2], periodicTorsionPrm[1], periodicTorsionPrm[0] );
+                                                    (int) (periodicTorsionPrm[2] + 0.001), periodicTorsionPrm[1], periodicTorsionPrm[0] );
 /*
 printf( "PeriodicTor: [%d %d %d %d] [%.5e %.5e %.5e]\n", periodicTorsionInd[0], periodicTorsionInd[1], periodicTorsionInd[2], periodicTorsionInd[3],
                                                         periodicTorsionPrm[2], periodicTorsionPrm[1], periodicTorsionPrm[0] ); fflush( stdout );

--- a/platforms/brook/src/BrookCalcStandardMMForceFieldKernel.h
+++ b/platforms/brook/src/BrookCalcStandardMMForceFieldKernel.h
@@ -33,7 +33,7 @@
 * -------------------------------------------------------------------------- */

 #include "kernels.h"
-#include "SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
 #include "BrookBonded.h"
 #include "BrookNonBonded.h"
 #include "StandardMMForceField.h"

--- a/platforms/brook/src/BrookFloatStreamInternal.h
+++ b/platforms/brook/src/BrookFloatStreamInternal.h
@@ -34,7 +34,7 @@

 #include "BrookPlatform.h"
 #include "BrookStreamInternal.h"
-#include "SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"

 namespace OpenMM {


--- a/platforms/brook/src/BrookPlatform.cpp
+++ b/platforms/brook/src/BrookPlatform.cpp
@@ -33,7 +33,7 @@
 #include "BrookKernelFactory.h"
 #include "OpenMMException.h"
 #include "kernels.h"
-#include "SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
 #include <brook/brook.hpp>
 #include <stdlib.h>
 #include <sstream>
@@ -83,10 +83,20 @@ BrookPlatform::BrookPlatform( ){

   // get Brook runtime

+#ifdef WIN32
+   char* runtime;
+   size_t numberOfEnv;
+    _dupenv_s( &runtime, &numberOfEnv, "brt_runtime" );
+#else
   char* runtime     = getenv( "brt_runtime" );
+#endif

   _initializeKernelFactory( );
   _setBrookRuntime( runtime );
+
+#ifdef WIN32
+   free( runtime );
+#endif
 }

 /** 

--- a/platforms/brook/src/BrookRandomNumberGenerator.cpp
+++ b/platforms/brook/src/BrookRandomNumberGenerator.cpp
@@ -32,11 +32,7 @@
 #include <sstream>
 #include "BrookRandomNumberGenerator.h"
 #include "OpenMMException.h"
-#include "kupdatesd.h"
-
-// use random number generator
-
-#include "SimTKOpenMMUtilities.h"
+#include "gpu/kupdatesd.h"

 using namespace OpenMM;
 using namespace std;

--- a/platforms/brook/src/BrookShakeAlgorithm.cpp
+++ b/platforms/brook/src/BrookShakeAlgorithm.cpp
@@ -35,10 +35,6 @@
 #include "OpenMMException.h"
 #include "BrookStreamImpl.h"

-// use random number generator
-
-#include "SimTKOpenMMUtilities.h"
-
 using namespace OpenMM;
 using namespace std;


--- a/platforms/brook/src/BrookStochasticDynamics.cpp
+++ b/platforms/brook/src/BrookStochasticDynamics.cpp
@@ -34,13 +34,13 @@
 #include "BrookPlatform.h"
 #include "OpenMMException.h"
 #include "BrookStreamImpl.h"
-#include "kshakeh.h"
-#include "kupdatesd.h"
-#include "kcommon.h"
+#include "gpu/kshakeh.h"
+#include "gpu/kupdatesd.h"
+#include "gpu/kcommon.h"

 // use random number generator

-#include "SimTKOpenMMUtilities.h"
+#include "../../reference/src/SimTKUtilities/SimTKOpenMMUtilities.h"

 using namespace OpenMM;
 using namespace std;

--- a/platforms/brook/src/BrookStreamInternal.h
+++ b/platforms/brook/src/BrookStreamInternal.h
@@ -33,7 +33,6 @@
 * -------------------------------------------------------------------------- */

 #include <brook/brook.hpp>
-#include "SimTKUtilities/SimTKOpenMMRealType.h"

 namespace OpenMM {


--- a/platforms/brook/src/gpu/Release/gmxgpu.lib
+++ b/platforms/brook/src/gpu/Release/gmxgpu.lib
--- a/platforms/brook/src/gpu/gmxgpu.lib
+++ b/platforms/brook/src/gpu/gmxgpu.lib
--- a/platforms/brook/src/gpu/gmxgpu_d.lib
+++ b/platforms/brook/src/gpu/gmxgpu_d.lib
--- a/platforms/brook/src/gpu/invmap.cpp
+++ b/platforms/brook/src/gpu/invmap.cpp
-
-/****************************************************************
-* This file is part of the gpu acceleration library for gromacs.
-* Author: V. Vishal
-* Copyright (C) Pande Group, Stanford, 2006
-*****************************************************************/
-#include <stdio.h>
-#include <brook/brook.hpp>
-// #include "typedefs.h"
-#include "invmap.h"
-
-
-/*
- * Helper functions for building inverse maps for 
- * torsions, impropers and angles.
- * 
- * */
-
-/*
- * For each atom, calculates the positions at which it's
- * forces are to be picked up from and stores the position
- * in the appropriate index.
- *
- * Input: number of dihedrals, the atom indices, and a flag indicating
- *        whether we're doing i(0), j(1), k(2) or l(3)
- * Output: an array of counts per atom
- *         arrays of inversemaps
- *         nimaps - the number of invmaps actually used.
- * 
- * */
-int
-gpuCalcInvMap( 
-		int posflag,  //0-niatoms-1
-		int niatoms,  //3 for angles, 4 for torsions, impropers
-		int nints,    //number of interactions
-		int natoms,   //number of atoms
-		int *atoms, //gromacs interaction list
-		int nmaps,      //maximum number of inverse maps
-        int counts[],   //output counts of how many places each atom occurs
-		float4 *invmaps[], //output array of nmaps inverse maps
-		int *nimaps        //output max number of inverse maps actually used
-		)
-{
-	int i, j;
-	int atom;
-	int mapnum, mapcomp;
-	
-	for ( i = 0; i < natoms; i++ )
-		counts[i] = 0;
-	
-	for ( i = 0; i < nmaps; i++ ) {
-		for ( j = 0; j < natoms; j++ ) {
-			invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
-		}
-	}
-	
-	//This will hold the number of imaps actually used
-	*nimaps = -1;
-
-   printf( "gpuCalcInvMap: posflag=%d niatoms=%d nints=%d natoms=%d nmaps=%d\n",
-           posflag, niatoms, nints, natoms, nmaps );
-
-	//Now note down the positions where each atom occurs
-	for ( i = 0; i < nints; i++ ) {
-		//This is our atom
-		atom = atoms[ (niatoms + 1) * i + posflag + 1 ];
-
-		//Special for merged bondeds
-		if ( atom == -1 ) {
-			continue;
-		}
-
-		//Check to make sure we're inside the limits
-		if ( counts[atom] > nmaps * 4 ) {
-			printf( "Atom %d has too many proper dihedrals(%d, max %d)\n",
-			         atom, counts[atom], nmaps * 4 );
-			return 0;
-		}
-		
-		//Which invmap will this go into
-		mapnum = counts[atom] / 4;
-
-		if ( mapnum > *nimaps )
-			*nimaps = mapnum;
-
-		//Which component will it be
-		mapcomp = counts[atom] % 4;
-
-		//Set it
-		//This is silly, but otherwise I have to declare it as float*
-		//and things get even more confusing. :)
-		switch (mapcomp) {
-			case 0: invmaps[mapnum][atom].x = (float) i; break;
-			case 1: invmaps[mapnum][atom].y = (float) i; break;
-			case 2: invmaps[mapnum][atom].z = (float) i; break;
-			case 3: invmaps[mapnum][atom].w = (float) i; break;
-		}
-		
-		counts[atom]++;
-
-printf( "Atom %d count=%d max %d mapcomp=%d val=%d mapnum=%d\n", atom,  counts[atom],
-        nmaps * 4, mapcomp, i, mapnum );
-	}
-
-	(*nimaps)++;
-	return 1;	
-}
-
-
-void
-gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[], FILE* logFile )
-{
-	int i;
-	int j;
-	for ( i = 0; i < natoms; i++ ) {
-		fprintf( logFile, "%d %d ", i, counts[i] );
-		for ( j = 0; j < nmaps; j++ ) {
-			fprintf( logFile, "%6.0f %6.0f %6.0f %6.0f", invmap[j][i].x, invmap[j][i].y, 
-			         invmap[j][i].z, invmap[j][i].w );
-		}
-		fprintf( logFile, "\n");
-	}
-}
-
-/* We are still plagued by kernel call overheads. This is for a big fat
- * merged inverse gather kernel:
- * Since we have 32 bit floats, we have 23 bits of mantissa or the largest
- * integer we can represent is 2^23. So it should be quite safe to add 
- * 100000 * n to the index where n is the stream in which we should do the
- * lookup. This assumes that nints < 100000, preferably nints << 100000
- * which should always be true
- * */
-int
-gpuCalcInvMap_merged( 
-		int nints,    //number of interactions
-		int natoms,   //number of atoms
-		int *atoms,   //ijkl,ijkl,ijkl...
-		int nmaps,      //maximum number of inverse maps
-        int counts[],   //output counts of how many places each atom occurs
-		float4 *invmaps[], //output array of nmaps inverse maps
-		int *nimaps        //output max number of inverse maps actually used
-		)
-{
-	int i, j;
-	int atom;
-	int mapnum, mapcomp;
-	int pos;
-	
-	for ( i = 0; i < natoms; i++ )
-		counts[i] = 0;
-
-	for ( i = 0; i < nmaps; i++ ) {
-		for ( j = 0; j < natoms; j++ ) {
-			invmaps[i][j] = float4( -1.0, -1.0, -1.0, -1.0 );
-		}
-	}
-
-	//This will hold the number of imaps actually used
-	*nimaps = -1;
-
-	//For each atom
-	for ( i = 0; i < nints; i++ ) {
-		for ( j = 0; j < 4; j++ ) {
-			
-			atom = atoms[ i * 4 + j ];
-			
-			if ( atom == -1 ) {
-				//Nothing to be done for this atom, go to next
-				continue;
-			}
-			
-			//Which map
-			mapnum = counts[ atom ] / 4;
-			
-			//Make sure we have space
-			if ( mapnum >= nmaps ) {
-				printf( "Atom %d has too many bondeds(%d, max %d)\n",
-						 atom, counts[atom], nmaps * 4 );
-				return 0;
-			}
-				
-			if ( mapnum > *nimaps ) {
-				*nimaps = mapnum;
-			}
-
-			//Which component
-			mapcomp = counts[ atom ] % 4;
-			
-			//Encode target stream and position
-			pos = 100000 * j + i;
-
-			switch ( mapcomp ) {
-				case 0: invmaps[mapnum][atom].x = (float) pos; break;
-				case 1: invmaps[mapnum][atom].y = (float) pos; break;
-				case 2: invmaps[mapnum][atom].z = (float) pos; break;
-				case 3: invmaps[mapnum][atom].w = (float) pos; break;
-			}
-
-			counts[ atom ]++;
-
-		}
-	}
-	
-	(*nimaps)++;
-	return 1;
-}
-
-/* Repacks the invmap streams for more efficient access in the
- * merged inverse gather kernel
- *
- * buf should be nimaps * natoms large.
- * */
-int
-gpuRepackInvMap_merged( int natoms, int nmaps, int *counts, 
-		float4 *invmaps[], float4 *buf )
-{
-	int i, j;
-	int nmaps_i;
-
-	for ( i = 0; i < natoms; i++ ) {
-		for ( j = 0; j < nmaps; j++ ) {
-			buf[ i + j*natoms ] = float4( -1.0f, -1.0f, -1.0f, -1.0f );
-		}
-	}
-	
-	for ( i = 0; i < natoms; i++ ) {
-		
-		nmaps_i = counts[i] / 4;
-		if ( counts[i] % 4 ) 
-			nmaps_i++;
-		
-		for ( j = 0; j < nmaps_i; j++ ) {
-			buf[ i + j * natoms ] = invmaps[j][i];
-		}
-	}
-	return 1;
-}
--- a/platforms/brook/src/gpu/kObcBaseD2.br
+++ b/platforms/brook/src/gpu/kObcBaseD2.br
--- a/platforms/brook/src/gpu/kangles.br
+++ b/platforms/brook/src/gpu/kangles.br
-
-/****************************************************************
-* This file is part of the gpu acceleration library for gromacs.
-* Author: V. Vishal
-* Copyright (C) Pande Group, Stanford, 2006
-*****************************************************************/
-
-//Harmonic angles kernel
-//Input is a stream of triplets i, j, k
-//parms is float2( theta0, kA )
-//Output is three streams of forces fi, fj, fk
-//Again, this is kept simple for now, can be optimized 
-//later as necessary
-kernel void kangles_harmonic( 
-		float xstrwidth,
-		float3 atoms<>, 
-		float2 parms<>, 
-		float4 posq[][],
-		out float3 fi<>, 
-		out float3 fj<>,
-		out float3 fk<>
-		) {
-	float theta;
-	float dx, dx2, fs;
-
-	float st,  dvdt, cik, sth, nrkj2, nrij2, cii, ckk, costheta, sintheta;
-	float rij2, rkj2;
-	float2 idx;
-	float2 ai, aj, ak;
-
-	float3 xi, xj, xk, rij, rkj;
-	
-	ai.y = floor( atoms.x / xstrwidth );
-	ai.x = atoms.x - ai.y * xstrwidth;
-
-	aj.y = floor( atoms.y / xstrwidth );
-	aj.x = atoms.y - aj.y * xstrwidth;
-	
-	ak.y = floor( atoms.z / xstrwidth );
-	ak.x = atoms.z - ak.y * xstrwidth;
-	
-	rij = posq[ ai ].xyz - posq[ aj ].xyz; //3
-	rkj = posq[ ak ].xyz - posq[ aj ].xyz; //3
-
-	rij2 = dot( rij, rij ); //5
-	rkj2 = dot( rkj, rkj ); //5
-	costheta = dot( rij, rkj ) / sqrt( rij2 * rkj2 ); //8
-
-	costheta = clamp( costheta, -1.0, 1.0 );
-	theta = acos( costheta ); //1 flop, ouch
-	sintheta = sqrt( 1 - costheta * costheta ); //3
-
-	dx = theta - parms.x; //1
-	dx2 = dx * dx; //1
-	
-	/*scalar force = dv/dtheta*/
-	fs = -parms.y * dx; //1
-
-	st = fs / sintheta; //1
-	st = clamp( st, -1000000.0, 1000000.0 ); //Does this work on the gpu for st=inf?
-
-	sth = st * costheta;	//1
-	
-	nrkj2 = dot( rkj, rkj ); //5
-	nrij2 = dot( rij, rij ); //5
-
-	cik = st * rsqrt( nrkj2 * nrij2 ); //3
-	cii = sth / nrij2; //1
-	ckk = sth / nrkj2; //1
-
-	fi = -( cik * rkj - cii * rij ); //7
-	fk = -( cik * rij - ckk * rkj ); //7
-	fj = -fi - fk; //3
-
-	//Total flops: 64
-}
--- a/platforms/brook/src/gpu/kcommon.h
+++ b/platforms/brook/src/gpu/kcommon.h
+#ifndef __KCOMMON_H__
+#define __KCOMMON_H__
+
+void  kgetxyz (::brook::stream instr,
+		::brook::stream outstr); 
+
+void  kzerof3 (::brook::stream outstr);
+void  kzerof4 (::brook::stream outstr); 
+void  kzerof4 (::brook::stream outstr); 
+
+void  ksetf4 (const float4  val, ::brook::stream outstr); 
+void kadd3( ::brook::stream instr, ::brook::stream outstr );
+void ksetStr3( ::brook::stream instr, ::brook::stream outstr );
+
+#endif // __KCOMMON_H__
--- a/platforms/brook/src/gpu/kcommonOrig.br
+++ b/platforms/brook/src/gpu/kcommonOrig.br
-
-/****************************************************************
-* This file is part of the gpu acceleration library for gromacs.
-* Author: V. Vishal
-* Copyright (C) Pande Group, Stanford, 2006
-*****************************************************************/
-
-//Kernel to set the xyz components of a float4 stream
-//Used for changing the coordinates without changing
-//the charges in strPosQ
-kernel void ksetxyz( float3 instr<>, float4 before<>, out float4 after<> ) {
-	after.xyz = instr;
-	after.w   = before.w;
-}
-
-//Inverse of above
-kernel void kgetxyz( float4 instr<>, out float3 outstr<> ) {
-	outstr = instr.xyz;
-}
-
-
-//Zeroes out a stream
-kernel void kzerof3( out float3 outstr<> ) {
-	outstr = float3( 0.0, 0.0, 0.0 );
-}
-
-//Zeros out a stream
-kernel void kzerof4( out float4 outstr<> ) {
-	outstr = float4( 0.0, 0.0, 0.0, 0.0 );
-}
-
-kernel void ksetf4( float4 val, out float4 outstr<> ) {
-	outstr = val;
-}