Removed Brook platform

6ccf6169 · Peter Eastman · 82e0bd2f · 82e0bd2f · 82e0bd2f · 82e0bd2f
Commit 6ccf6169 authored Jan 12, 2010 by Peter Eastman
20 changed files
--- a/platforms/brook/src/BrookVerletDynamics.h
+++ b/platforms/brook/src/BrookVerletDynamics.h
-#ifndef OPENMM_BROOK_VERLET_DYNAMCIS_H_
-#define OPENMM_BROOK_VERLET_DYNAMCIS_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-#include <vector>
-#include <set>
-
-#include "BrookStreamImpl.h"
-#include "BrookShakeAlgorithm.h"
-#include "BrookPlatform.h"
-#include "BrookCommon.h"
-
-namespace OpenMM {
-
-/**
- *
- * Encapsulates stochastic dynamics algorithm 
- *
- */
-
-class BrookVerletDynamics : public BrookCommon {
-
-   public:
-  
-      /** 
-       * Constructor
-       * 
-       */
-      
-      BrookVerletDynamics(  );
-  
-      /** 
-       * Destructor
-       * 
-       */
-      
-      ~BrookVerletDynamics();
-  
-      /**
-       * Get step size
-       *
-       * @return step size
-       */
-
-      BrookOpenMMFloat getStepSize( void ) const; 
-
-      /**
-       *
-       * Get array of derived parameters indexed by 'DerivedParameters' enums
-       *
-       * @return array
-       *
-       */
-      
-      const BrookOpenMMFloat* getDerivedParameters( void ) const;
-      
-      /**
-       * Get VerletDynamics particle stream width
-       *
-       * @return particle stream width
-       */
-
-      int getVerletDynamicsParticleStreamWidth( void ) const; 
-
-      /**
-       * Get VerletDynamics particle stream height
-       *
-       * @return particle stream height
-       */
-
-      int getVerletDynamicsParticleStreamHeight( void ) const;
-
-      /**
-       * Get VerletDynamics particle stream size
-       * 
-       * @return particle stream size
-       */
-
-      int getVerletDynamicsParticleStreamSize( void ) const; 
-
-      /** 
-       * Update parameters
-       * 
-       * @param  step size       step size
-       *
-       * @return   DefaultReturnValue
-       *
-       */
-      
-      int updateParameters( double stepSize );
-      
-      /** 
-       * Update
-       * 
-       * @param  positions                   particle positions
-       * @param  velocities                  particle velocities
-       * @param  forces                      particle forces
-       * @param  brookShakeAlgorithm         BrookShakeAlgorithm reference
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int update( BrookStreamImpl& positions, BrookStreamImpl& velocities,
-                  const BrookStreamImpl& forces, BrookShakeAlgorithm& brookShakeAlgorithm );
-
-      /** 
-       * Get array of VerletDynamics streams 
-       *
-       * @return  array ofstreams
-       *
-       */
-      
-      BrookFloatStreamInternal** getStreams( void );
-      
-      /* 
-       * Setup of VerletDynamics parameters
-       *
-       * @param masses                particle masses
-       * @param platform              Brook platform
-       *
-       * @return ErrorReturnValue value if error, else DefaultReturnValue
-       *
-       * */
-      
-      int setup( const std::vector<double>& masses, const Platform& platform  );
-      
-      /* 
-       * Get contents of object
-       *
-       * @param level of dump
-       *
-       * @return string containing contents
-       *
-       * */
-      
-      std::string getContentsString( int level = 0 ) const;
-
-      /** 
-       * Get V-prime stream 
-       *
-       * @return  V-prime stream
-       *
-       */
-      
-      BrookFloatStreamInternal* getVPrimeStream( void ) const;
-
-      /** 
-       * Get X-prime stream 
-       *
-       * @return  X-prime stream
-       *
-       */
-      
-      BrookFloatStreamInternal* getXPrimeStream( void ) const;
-
-      /** 
-       * Get inverse sqrt masses 
-       * 
-       * @return   inverse sqrt masses stream
-       *
-       */
-      
-      BrookFloatStreamInternal* getInverseMassStream( void ) const;
-
-   private:
-   
-      // streams indices
-
-      enum BrookVerletDynamicsStreams { 
-              VPrimeStream,
-              XPrimeStream,
-              InverseMassStream,
-              LastStreamIndex
-           };
-
-      // track step (diagnostics)
-
-      int _internalStepCount;
-
-      BrookOpenMMFloat _stepSize;
-
-      // Particle stream dimensions
-
-      int _verletParticleStreamWidth;
-      int _verletParticleStreamHeight;
-      int _verletParticleStreamSize;
-
-      /*
-       * Update streams
-       * 
-       * @return  DefaultReturn
-       *
-       */
-      
-      int _updateVerletStreams( void );
-      
-      // inverse masses
-
-      BrookOpenMMFloat* _inverseMasses;
-
-      // internal streams
-
-      BrookFloatStreamInternal* _verletStreams[LastStreamIndex];
-
-      /** 
-       * Set stepSize
-       * 
-       * @param   stepSize
-       *
-       * @return      DefaultReturn
-       *
-       */
-      
-      int _setStepSize( BrookOpenMMFloat stepSize );
-      
-      /* 
-       * Setup of stream dimensions
-       *
-       * @param particleStreamSize        particle stream size
-       * @param particleStreamWidth       particle stream width
-       *
-       * @return ErrorReturnValue if error, else DefaultReturnValue
-       *
-       * */
-      
-      int _initializeStreamSizes( int particleStreamSize, int particleStreamWidth );
-
-      /** 
-       * Initialize stream dimensions
-       * 
-       * @param numberOfParticles         number of particles
-       * @param platform                  platform
-       *
-       * @return ErrorReturnValue if error, else DefaultReturnValue
-       *
-       */
-      
-      int _initializeStreamSizes( int numberOfParticles, const Platform& platform );
-      
-      /** 
-       * Initialize stream dimensions and streams
-       * 
-       * @param platform                  platform
-       *
-       * @return nonzero value if error
-       *
-       */
-      
-      int _initializeStreams( const Platform& platform );
-
-      /** 
-       * Set masses 
-       * 
-       * @param masses             particle masses
-       *
-       */
-      
-      int _setInverseMasses( const std::vector<double>& masses );
-      
-};
-
-} // namespace OpenMM
-
-#endif /* OPENMM_BROOK_VERLET_DYNAMCIS_H_ */
--- a/platforms/brook/src/OpenMMBrookInterface.cpp
+++ b/platforms/brook/src/OpenMMBrookInterface.cpp
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/LangevinIntegrator.h"
-#include "ReferencePlatform.h"
-#include "openmm/internal/ContextImpl.h"
-#include "openmm/OpenMMException.h"
-
-#include "BrookStreamImpl.h"
-#include "OpenMMBrookInterface.h"
-#include "kernels/kcommon.h"
-
-#include <cmath>
-#include <limits>
-#include <sstream>
-
-using namespace OpenMM;
-using namespace std;
-
-/** 
- * OpenMMBrookInterface constructor
- * 
- */
-
-OpenMMBrookInterface::OpenMMBrookInterface( int streamWidth, int duplicationFactor ) : _particleStreamWidth(streamWidth){
-
-// ---------------------------------------------------------------------------------------
-
-   // static const std::string methodName      = "OpenMMBrookInterface::OpenMMBrookInterface";
-
-// ---------------------------------------------------------------------------------------
-
-   _numberOfParticles                       = 0;
-
-   _triggerForceKernel                      = NULL;
-   _triggerEnergyKernel                     = NULL;
-
-   _positions                               = NULL;
-   _velocities                              = NULL;
-   _forces                                  = NULL;
-
-   _log                                     = NULL;
-//_log = stderr;
-   _time                                    = 0.0;
-
-   _particleStreamSize                      = -1;
-
-   for( int ii = 0; ii < LastBondForce; ii++ ){
-      _bondParameters[ii] = NULL;
-   }
-
-   if( duplicationFactor < 1 ){
-      duplicationFactor = 4;
-   }
-   _brookNonBonded.setDuplicationFactor( duplicationFactor );
-   _brookGbsa.setDuplicationFactor( duplicationFactor );
-}   
-
-/** 
- * OpenMMBrookInterface destructor
- * 
- */
-
-OpenMMBrookInterface::~OpenMMBrookInterface( ){
-
-// ---------------------------------------------------------------------------------------
-
-   // static const std::string methodName      = "OpenMMBrookInterface::OpenMMBrookInterface";
-
-// ---------------------------------------------------------------------------------------
-
-   for( int ii = 0; ii < LastBondForce; ii++ ){
-      delete _bondParameters[ii];
-   }
-
-}
-
-/** 
- * Get number of particles
- * 
- * @return   number of particles
- *
- */
-
-int OpenMMBrookInterface::getNumberOfParticles( void ) const {
-   return _numberOfParticles;
-}
-
-/** 
- * Set number of particles
- * 
- * @param numberOfParticles number of particles
- *
- */
-
-int OpenMMBrookInterface::setNumberOfParticles( int numberOfParticles ){
-   _numberOfParticles = numberOfParticles;
-   return BrookCommon::DefaultReturnValue;
-}
-
-/**
-* Get the current time
-*
-* @return   the current time
-*
-*/
-
-double OpenMMBrookInterface::getTime( void ) const {
-    return _time;
-}
-
-/**
-* Set the current time
-*
-* @param time the current time
-*
-* @return DefaultReturnValue
-*
-*/
-
-int OpenMMBrookInterface::setTime( double time) {
-    _time = time;
-    return BrookCommon::DefaultReturnValue;
-}
-
-/** 
- * Get particle stream width
- * 
- * @return    particle stream width
- *
- */
-
-int OpenMMBrookInterface::getParticleStreamWidth( void ) const {
-   return _particleStreamWidth;
-}
-
-/** 
- * Get particle stream size
- * 
- * @return    particle stream size
- *
- */
-
-int OpenMMBrookInterface::getParticleStreamSize( void ) const {
-   if( _particleStreamSize < 0 && _numberOfParticles > 0 ){
-       OpenMMBrookInterface* localThis = const_cast<OpenMMBrookInterface* const>(this);
-      localThis->_particleStreamSize = BrookPlatform::getStreamSize( _numberOfParticles, _particleStreamWidth, NULL );
-   }
-   return _particleStreamSize;
-}
-
-/** 
- * Get log file reference
- * 
- * @return  log file reference
- *
- */
-
-FILE* OpenMMBrookInterface::getLog( void ) const {
-   return _log;
-}
-
-/** 
- * Set log file reference
- * 
- * @param  log file reference
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setLog( FILE* log ){
-   _log          = log;
-   _brookBonded.setLog( log );
-   _brookNonBonded.setLog( log );
-   _brookGbsa.setLog( log );
-   return BrookCommon::DefaultReturnValue;
-}
-
-/** 
- * Get BrookBondParameters at specified index
- * 
- * @param   index
- *
- * @return  BrookBondParameters* object
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::_getBondParameters( BondParameterIndices index ) const {
-   return _bondParameters[index];
-}
-
-/** 
- * Set BrookBondParameters at specified index
- * 
- * @param index
- * @param brookBondParameters brookBondParameters for BrookBondParameters
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::_setBondParameters( BondParameterIndices index, BrookBondParameters* brookBondParameters ){
-
-   if( brookBondParameters && brookBondParameters->getNumberOfBonds() > 0 ){
-      _brookBonded.setIsActive( 1 );
-   }
-
-   _bondParameters[index] = brookBondParameters;
-
-   if( !brookBondParameters || brookBondParameters->getNumberOfBonds() < 1 ){
-      int isActive = 0;
-      for( int ii = 0; ii < LastBondForce && !isActive; ii++ ){
-         isActive = (_bondParameters[ii] != NULL && brookBondParameters->getNumberOfBonds() > 0) ? 1 : 0;
-      }
-       _brookBonded.setIsActive( isActive );
-   }
-   return BrookCommon::DefaultReturnValue;
-}
-
-/** 
- * Get BrookBondParameters for harmonic bond force
- * 
- * @return brookBondParameters for harmonic bond force
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::getHarmonicBondForceParameters( void ) const {
-   return _getBondParameters( HarmonicBondIndex );
-}
-
-/** 
- * Set BrookBondParameters for harmonic bond force
- * 
- * @param brookBondParameters brookBondParameters for harmonic bond force
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setHarmonicBondForceParameters( BrookBondParameters* brookBondParameters ){
-   _brookBonded.setupCompleted( 0 );
-   return _setBondParameters( HarmonicBondIndex, brookBondParameters );
-}
-
-/** 
- * Get BrookBondParameters for harmonic angle force
- * 
- * @return brookBondParameters for harmonic angle force
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::getHarmonicAngleForceParameters( void ) const {
-   return _getBondParameters( HarmonicAngleIndex );
-}
-
-/** 
- * Set BrookBondParameters for harmonic angle force
- * 
- * @param brookBondParameters brookBondParameters for harmonic angle force
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setHarmonicAngleForceParameters( BrookBondParameters* brookBondParameters ){
-   _brookBonded.setupCompleted( 0 );
-   return _setBondParameters( HarmonicAngleIndex, brookBondParameters );
-}
-
-/** 
- * Get BrookBondParameters for periodic torsion force
- * 
- * @return brookBondParameters for periodic torsion force
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::getPeriodicTorsionForceParameters( void ) const {
-   return _getBondParameters( PeriodicTorsionForceIndex );
-}
-
-/** 
- * Set BrookBondParameters for periodic torsion force
- * 
- * @param brookBondParameters brookBondParameters for periodic torsion force
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setPeriodicTorsionForceParameters( BrookBondParameters* brookBondParameters ){
-   _brookBonded.setupCompleted( 0 );
-   return _setBondParameters( PeriodicTorsionForceIndex, brookBondParameters );
-}
-
-/** 
- * Get BrookBondParameters for rb torsion force
- * 
- * @return brookBondParameters for rb torsion force
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::getRBTorsionForceParameters( void ) const {
-   return _getBondParameters( RbTorsionForceIndex );
-}
-
-/** 
- * Set BrookBondParameters for RB torsion force
- * 
- * @param brookBondParameters brookBondParameters for RB torsion force
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setRBTorsionForceParameters( BrookBondParameters* brookBondParameters ){
-   _brookBonded.setupCompleted( 0 );
-   return _setBondParameters( RbTorsionForceIndex, brookBondParameters );
-}
-
-/** 
- * Get BrookBondParameters for LJ 14 force
- * 
- * @return brookBondParameters for LJ 14 force
- *
- */
-
-BrookBondParameters* OpenMMBrookInterface::getNonBonded14ForceParameters( void ) const {
-   return _getBondParameters( LJ14Index );
-}
-
-/** 
- * Set BrookBondParameters for LJ 14 force
- * 
- * @param brookBondParameters brookBondParameters for LJ 14 force
- *
- * @return  DefaultReturnValue
- *
- */
-
-int OpenMMBrookInterface::setNonBonded14ForceParameters( BrookBondParameters* brookBondParameters ){
-   _brookBonded.setupCompleted( 0 );
-   return _setBondParameters( LJ14Index, brookBondParameters );
-}
-
-/** 
- * Get positions stream
- * 
- * @return particle positions 
- *
- */
-    
-BrookStreamImpl* OpenMMBrookInterface::getParticlePositions( void ){
-   return _positions;
-}
-
-/** 
- * Set positions stream
- * 
- * @param positions Brook stream containing particle positions
- *
- * @return  DefaultReturnValue
- *
- */
-    
-int OpenMMBrookInterface::setParticlePositions( BrookStreamImpl* positions ){
-   _positions = positions;
-   return DefaultReturnValue;
-}
-
-/** 
- * Get velocities stream
- * 
- * @return particle velocities
- *
- */
-    
-BrookStreamImpl* OpenMMBrookInterface::getParticleVelocities( void ){
-   return _velocities;
-}
-
-/** 
- * Set velocities stream
- * 
- * @param velocities Brook stream containing particle velocities
- *
- * @return  DefaultReturnValue
- *
- */
-    
-int OpenMMBrookInterface::setParticleVelocities( BrookStreamImpl* velocities ){
-   _velocities = velocities;
-   return DefaultReturnValue;
-}
-
-/** 
- * Get forces stream
- * 
- * @return ParticleForces
- *
- */
-    
-BrookStreamImpl* OpenMMBrookInterface::getParticleForces( void ){
-   return _forces;
-}
-
-/** 
- * Set forces stream
- * 
- * @param forces Brook stream containing particle forces
- *
- * @return  DefaultReturnValue
- *
- */
-    
-int OpenMMBrookInterface::setParticleForces( BrookStreamImpl* forces ){
-   _forces = forces;
-   return DefaultReturnValue;
-}
-
-/** 
- * Set trigger Force Kernel
- *
- * @param triggerForceKernel kernel to calculate force
- *
- */
-    
-void OpenMMBrookInterface::setTriggerForceKernel( void* triggerForceKernel ){
-   _triggerForceKernel = triggerForceKernel;
-}
-    
-/** 
- * Get trigger Force Kernel
- *
- * @return triggerForceKernel kernel to calculate force
- *
- */
-    
-void* OpenMMBrookInterface::getTriggerForceKernel( void ) const {
-   return _triggerForceKernel;
-}
-
-/** 
- * Set trigger Energy Kernel
- *
- * @param triggerEnergyKernel kernel to calculate force
- *
- */
-    
-void OpenMMBrookInterface::setTriggerEnergyKernel( void* triggerEnergyKernel ){
-   _triggerEnergyKernel = triggerEnergyKernel;
-}
-    
-/** 
- * Get trigger Energy Kernel
- *
- * @return triggerEnergyKernel kernel to calculate force
- *
- */
-    
-void* OpenMMBrookInterface::getTriggerEnergyKernel( void ) const {
-   return _triggerEnergyKernel;
-}
-
-/** 
- * Get Brook non bonded
- *
- * @return BrookNonBonded reference
- *
- */
-    
-BrookNonBonded& OpenMMBrookInterface::getBrookNonBonded( void ){
-   return _brookNonBonded;
-}
-
-/** 
- * Get Brook GBSA
- *
- * @return BrookGbsa reference
- *
- */
-    
-BrookGbsa& OpenMMBrookInterface::getBrookGbsa( void ){
-   return _brookGbsa;
-}
-
-/** 
- * Zero forces
- * 
- * @param context     context
- *
- */
-
-void OpenMMBrookInterface::zeroForces( ContextImpl& context ){
-
-// ---------------------------------------------------------------------------------------
-
-   //static const std::string methodName      = "OpenMMBrookInterface::zeroForces";
-
-// ---------------------------------------------------------------------------------------
-
-   // zero forces
-
-   BrookStreamImpl* forces  = getParticleForces();
-   kzerof3( forces->getBrookStream() );
-
-   // ---------------------------------------------------------------------------------------
-}
-
-/** 
- * Compute forces
- * 
- * @param context     context
- *
- */
-
-void OpenMMBrookInterface::computeForces( ContextImpl& context ){
-
-// ---------------------------------------------------------------------------------------
-
-   static const std::string methodName      = "OpenMMBrookInterface::executeForces";
-
-   static       int printOn                 = 0;
-   static const int MaxErrorMessages        = 2;
-   static       int ErrorMessages           = 0;
-   FILE* log;
-
-// ---------------------------------------------------------------------------------------
-
-//setLog( stderr );
-
-   if( printOn && getLog() ){
-      log     = getLog();
-   } else {
-      printOn = 0;
-   }
-
-   BrookStreamImpl* positions = getParticlePositions();
-   BrookStreamImpl* forces    = getParticleForces();
-
-   // info
-
-   if( printOn > 0 ){
-   //if( 1 ){
-      printForcesToFile( context );
-   }
-
-   // nonbonded forces
-
-   if( _brookNonBonded.isActive() ){
-      _brookNonBonded.computeForces( *positions, *forces );
-   }
-
-// ---------------------------------------------------------------------------------------
-
-   // bonded forces
-
-   if( printOn ){
-      (void) fprintf( log, "%s done nonbonded: bonded=%d completed=%d\n", methodName.c_str(),
-                      _brookBonded.isActive(), _brookBonded.isSetupCompleted() ); 
-      (void) fflush( log );
-   }
-
-   if( _brookBonded.isActive() ){
-
-      // perform setup first time through
-
-      if( _brookBonded.isSetupCompleted() == 0 ){
-         _brookBonded.setup( getNumberOfParticles(), getHarmonicBondForceParameters(), getHarmonicAngleForceParameters(),
-                             getPeriodicTorsionForceParameters(), getRBTorsionForceParameters(),
-                             getNonBonded14ForceParameters(),
-                             getParticleStreamWidth(), getParticleStreamSize() );
-      }
-
-      _brookBonded.computeForces( *positions, *forces );
-   
-      // diagnostics
-   
-      if( printOn ){
-         static int step           = 0;
-         static const int stopStep = 10;
-         (void) fprintf( log, "%s done bonded computeForces\n", methodName.c_str() );
-         (void) fflush( log );
-   
-   /*
-         (void) fprintf( getLog(), "\nFinal NB & bonded forces" );
-         BrookStreamInternal* brookStreamInternalF   = forceStream.getBrookStreamInternal();
-         brookStreamInternalF->printToFile( getLog() );
-         void* dataV = brookStreamInternalF->getData(1);
-         float* data = (float*) dataV;
-         (void) fprintf( getLog(), "\nFinal NB & bonded forces RAW\n" );
-         for( int ii = 0; ii < _brookNonBonded->getNumberOfParticles()*3; ii += 3 ){
-            (void) fprintf( getLog(), "%d [%.6e %.6e %.6e]\n", ii, data[ii], data[ii+1], data[ii+2] );
-         }
-   */
-   
-         if( step++ >= stopStep ){
-            exit(0);
-         }
-      }
-
-   }
-
-   // GBSA OBC forces
-
-   if( _brookGbsa.isActive() ){
-      _brookGbsa.computeForces( *positions, *forces );
-   }
-
-   // ---------------------------------------------------------------------------------------
-}
-
-/** 
- * Print forces to file 
- * 
- * @param context     context
- *
- */
-
-void OpenMMBrookInterface::printForcesToFile( ContextImpl& context ){
-
-// ---------------------------------------------------------------------------------------
-
-   static const std::string methodName      = "OpenMMBrookInterface::printForcesToFile";
-   float zero                               = 0.0f;
-   static int step                          = 0;
-
-// ---------------------------------------------------------------------------------------
-
-   // first step only?
-
-   if( step > 1 ){
-      return;
-   }
-
-   std::stringstream fileNameBaseS;
-   //fileNameBase << "Brook_" << context.getTime() << "_"; 
-   fileNameBaseS << "Brook_" << step++ << "_"; 
-   std::string fileNameBase = fileNameBaseS.str();
-
-   // create vector of streams for output
-
-   BrookStreamImpl* positions = getParticlePositions();
-   BrookStreamImpl* forces    = getParticleForces();
-
-   std::vector<BrookStreamInternal*> streams;
-   streams.push_back( positions->getBrookStreamInternal() );
-   streams.push_back( forces->getBrookStreamInternal() );
-
-// ---------------------------------------------------------------------------------------
-
-   // nonbonded forces
-
-   if( _brookNonBonded.isActive() ){
-      forces->fillWithValue( &zero );
-      _brookNonBonded.computeForces( *positions, *forces );
-      std::string fileName = fileNameBase + "NonBonded.txt";
-      BrookStreamInternal::printStreamsToFile( fileName, streams );
-   }
-
-// ---------------------------------------------------------------------------------------
-
-   // bonded forces
-
-   if( _brookBonded.isActive() ){
-
-      // perform setup first time through
-
-      if( _brookBonded.isSetupCompleted() == 0 ){
-
-         _brookBonded.setup( getNumberOfParticles(), getHarmonicBondForceParameters(), getHarmonicAngleForceParameters(),
-                             getPeriodicTorsionForceParameters(), getRBTorsionForceParameters(),
-                             getNonBonded14ForceParameters(),
-                             getParticleStreamWidth(), getParticleStreamSize() );
-      }
-
-      forces->fillWithValue( &zero );
-      _brookBonded.computeForces( *positions, *forces );
-      std::string fileName = fileNameBase + "Bonded.txt";
-      BrookStreamInternal::printStreamsToFile( fileName, streams );
-   
-   }
-
-// ---------------------------------------------------------------------------------------
-
-   // GBSA OBC forces
-
-   if( _brookGbsa.isActive() ){
-
-      forces->fillWithValue( &zero );
-      _brookGbsa.computeForces( *positions, *forces );
-
-      std::string fileName = fileNameBase + "Obc.txt";
-      BrookStreamInternal::printStreamsToFile( fileName, streams );
-
-   }
-
-// ---------------------------------------------------------------------------------------
-
-   // all forces
-
-   if( 1 ){
-
-      forces->fillWithValue( &zero );
-
-      if( _brookNonBonded.isActive() ){
-         _brookNonBonded.computeForces( *positions, *forces );
-      }
-
-      if( _brookBonded.isActive() ){
-         _brookBonded.computeForces( *positions, *forces );
-      }
-
-      if( _brookGbsa.isActive() ){
-         _brookGbsa.computeForces( *positions, *forces );
-      }
-
-      std::string fileName = fileNameBase + "AllF.txt";
-      BrookStreamInternal::printStreamsToFile( fileName, streams );
-   }
-
-   forces->fillWithValue( &zero );
-
-   // ---------------------------------------------------------------------------------------
-}
-
-/** 
- * Compute energy
- * 
- * @param context     context
- * @param system      system reference
- *
- */
-
-double OpenMMBrookInterface::computeEnergy( ContextImpl& context, System& system ){
-
-// ---------------------------------------------------------------------------------------
-
-   //static const std::string methodName      = "OpenMMBrookInterface::computeEnergy";
-
-// ---------------------------------------------------------------------------------------
-
-   // We don't currently have GPU kernels to calculate energy, so instead we have the reference
-   // platform do it.  This is VERY slow.
-    
-   LangevinIntegrator integrator(0.0, 1.0, 0.0);
-   ReferencePlatform platform;
-   Context refContext( system, integrator, platform );
-
-   const Stream& positions = context.getPositions();
-   double* posData         = new double[positions.getSize()*3];
-   positions.saveToArray(posData);
-   vector<Vec3> pos(positions.getSize());
-
-   for( unsigned int ii = 0; ii < pos.size(); ii++ ){
-      pos[ii] = Vec3(posData[3*ii], posData[3*ii+1], posData[3*ii+2]);
-   }
-   delete[] posData;
-   refContext.setPositions(pos);
-
-   return refContext.getState(State::Energy).getPotentialEnergy();
-
-}
--- a/platforms/brook/src/OpenMMBrookInterface.h
+++ b/platforms/brook/src/OpenMMBrookInterface.h
-#ifndef OPENMM_BROOK_INTERFACE_H_
-#define OPENMM_BROOK_INTERFACE_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/kernels.h"
-#include "../../reference/src/SimTKUtilities/SimTKOpenMMRealType.h"
-#include "BrookBondParameters.h"
-#include "BrookBonded.h"
-#include "BrookNonBonded.h"
-#include "BrookGbsa.h"
-#include "openmm/NonbondedForce.h"
-#include "openmm/Context.h"
-#include "openmm/System.h"
-
-namespace OpenMM {
-
-/**
- * OpenMM-Brook interface methods
- */
-class OpenMMBrookInterface {
-
-   public:
-  
-      OpenMMBrookInterface( int streamWidth, int duplicationFactor );
-  
-      ~OpenMMBrookInterface();
-  
-      /**
-       * Get reference Context
-       * 
-       * @param numberOfParticles  number of particles
-       *
-       * @return  Context
-       *
-       */
-      
-      Context* getReferenceContext( int numberOfParticles );
-      
-      /** 
-       * Set log file reference
-       * 
-       * @param  log file reference
-       *
-       * @return DefaultReturnValue
-       *
-       */
-      
-      int setLog( FILE* log );
-
-      /* 
-       * Get contents of object
-       *
-       * @param level of dump
-       *
-       * @return string containing contents
-       *
-       * */
-      
-      std::string getContents( int level ) const;
-
-      /** 
-       * Get number of particles
-       * 
-       * @return   number of particles
-       *
-       */
-      
-      int getNumberOfParticles( void ) const;
-      
-      /** 
-       * Set number of particles
-       * 
-       * @param numberOfParticles number of particles
-       *
-       * @return DefaultReturnValue
-       *
-       */
-      
-      int setNumberOfParticles( int numberOfParticles );
-
-      /**
-       * Get the current time
-       *
-       * @return   the current time
-       *
-       */
-
-      double getTime( void ) const;
-
-      /**
-       * Set the current time
-       *
-       * @param time the current time
-       *
-       * @return DefaultReturnValue
-       *
-       */
-
-      int setTime( double time);
-      
-      /** 
-       * Get particle stream width
-       * 
-       * @return    particle stream width
-       *
-       */
-      
-      int getParticleStreamWidth( void ) const;
-      
-      /** 
-       * Get particle stream size
-       * 
-       * @return    particle stream size
-       *
-       */
-      
-      int getParticleStreamSize( void ) const;
-      
-      /** 
-       * Get log file reference
-       * 
-       * @return  log file reference
-       *
-       */
-      
-      FILE* getLog( void ) const;
-      
-      /** 
-       * Zero forces
-       *
-       * @param context ContextImpl context 
-       *
-       */
-      
-      void zeroForces( ContextImpl& context );
-      
-      /** 
-       * Compute forces
-       *
-       * @param context ContextImpl context 
-       *
-       */
-      
-      void computeForces( ContextImpl& context );
-      
-      /** 
-       * Compute energy
-       *
-       * @param context ContextImpl context 
-       * @param system  system reference
-       *
-       * @return potential energy
-       *
-       */
-      
-      double computeEnergy( ContextImpl& context, System& system );
-      
-      /** 
-       * Set trigger Force Kernel
-       *
-       * @param triggerForceKernel kernel to calculate force
-       *
-       */
-      
-      void setTriggerForceKernel( void* triggerForceKernel );
-      
-      /** 
-       * Get trigger Force Kernel
-       *
-       * @return triggerForceKernel kernel to calculate force
-       *
-       */
-      
-      void* getTriggerForceKernel( void ) const;
-      
-      /** 
-       * Set trigger Energy Kernel
-       *
-       * @param triggerEnergyKernel kernel to calculate force
-       *
-       */
-      
-      void setTriggerEnergyKernel( void* triggerEnergyKernel );
-      
-      /** 
-       * Get trigger Energy Kernel
-       *
-       * @return triggerEnergyKernel kernel to calculate force
-       *
-       */
-      
-      void* getTriggerEnergyKernel( void ) const;
-      
-      /** 
-       * Get BrookNonBonded reference
-       * 
-       * @return BrookNonBonded reference
-       *
-       */
-      
-      BrookNonBonded& getBrookNonBonded( void );
-      
-      /** 
-       * Get BrookGbsa reference
-       * 
-       * @return BrookGbsa reference
-       *
-       */
-      
-      BrookGbsa& getBrookGbsa( void );
-      
-      /** 
-       * Get BrookBondParameters for harmonic bond force
-       * 
-       * @return brookBondParameters for BrookBondParameters for harmonic bond force
-       *
-       */
-      
-      BrookBondParameters* getHarmonicBondForceParameters( void ) const;
-      
-      /** 
-       * Set BrookBondParameters for harmonic bond force
-       * 
-       * @param brookBondParameters BrookBondParameters for harmonic bond force
-       *
-       * @return DefaultReturnValue
-       *
-       */
-      
-      int setHarmonicBondForceParameters( BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get BrookBondParameters for harmonic angle force
-       * 
-       * @return brookBondParameters for BrookBondParameters for harmonic angle force
-       *
-       */
-      
-      BrookBondParameters* getHarmonicAngleForceParameters( void ) const;
-      
-      /** 
-       * Set BrookBondParameters for harmonic angle force
-       * 
-       * @param brookBondParameters brookBondParameters for BrookBondParameters for harmonic angle force
-       *
-       * @return DefaultReturnValue
-       *
-       */
-      
-      int setHarmonicAngleForceParameters( BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get BrookBondParameters for periodic torsion force
-       * 
-       * @return brookBondParameters for BrookBondParameters for periodic torsion force
-       *
-       */
-      
-      BrookBondParameters* getPeriodicTorsionForceParameters( void ) const;
-
-      /** 
-       * Set BrookBondParameters for periodic torsion force
-       * 
-       * @param brookBondParameters for periodic torsion force
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setPeriodicTorsionForceParameters( BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get BrookBondParameters for RB torsion force
-       * 
-       * @return brookBondParameters for BrookBondParameters for RB torsion force
-       *
-       */
-      
-      BrookBondParameters* getRBTorsionForceParameters( void ) const;
-
-      /** 
-       * Set BrookBondParameters for RB torsion force
-       * 
-       * @param brookBondParameters brookBondParameters for RB torsion force
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setRBTorsionForceParameters( BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get BrookBondParameters for LJ 14 forces
-       * 
-       * @return brookBondParameters for BrookBondParameters for LJ 14 forces
-       *
-       */
-      
-      BrookBondParameters* getNonBonded14ForceParameters( void ) const;
-
-      /** 
-       * Set BrookBondParameters for LJ 14 force
-       * 
-       * @param brookBondParameters brookBondParameters for LJ 14 force
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setNonBonded14ForceParameters( BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get positions stream
-       * 
-       * @return particle positions 
-       *
-       */
-         
-      BrookStreamImpl* getParticlePositions( void );
-      
-      /** 
-       * Set positions stream
-       * 
-       * @param positions Brook stream containing particle positions
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setParticlePositions( BrookStreamImpl* positions );
-      
-      /** 
-       * Get velocities stream
-       * 
-       * @return particle velocities
-       *
-       */
-             
-      BrookStreamImpl* getParticleVelocities( void );
-       
-      /** 
-       * Set velocities stream
-       * 
-       * @param velocities Brook stream containing particle velocities
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setParticleVelocities( BrookStreamImpl* velocities );
-      
-      /** 
-       * Get forces stream
-       * 
-       * @return ParticleForces
-       *
-       */
-             
-      BrookStreamImpl* getParticleForces( void );
-
-      /** 
-       * Set forces stream
-       * 
-       * @param forces Brook stream containing particle forces
-       *
-       * @return  DefaultReturnValue
-       *
-       */
-      
-      int setParticleForces( BrookStreamImpl* forces );
-
-      /** 
-       * Print forces to file 
-       * 
-       * @param context     context
-       *
-       */
-      
-      void printForcesToFile( ContextImpl& context );
-      
-   private:
-   
-      static const int DefaultReturnValue = 0;
-      static const int ErrorReturnValue   = -1;
-
-      enum BondParameterIndices { HarmonicBondIndex, HarmonicAngleIndex, PeriodicTorsionForceIndex, RbTorsionForceIndex, LJ14Index, LastBondForce };
-
-      // log file reference
-
-      FILE* _log;
-
-      int _particleStreamWidth;
-      int _particleStreamSize;
-
-       // number of particles
-
-       int _numberOfParticles;
-   
-       // Brook bonded, nonbonded, Gbsa
-
-       BrookBonded     _brookBonded;
-       BrookNonBonded  _brookNonBonded;
-       BrookGbsa       _brookGbsa;
-
-       void* _triggerForceKernel;
-       void* _triggerEnergyKernel;
-
-       BrookBondParameters* _bondParameters[LastBondForce];
-
-       // context-related fields
-
-       BrookStreamImpl* _positions;
-       BrookStreamImpl* _velocities;
-       BrookStreamImpl* _forces;
-       double _time;
-
-      /** 
-       * Set BrookBondParameters 
-       * 
-       * @param index
-       * @param brookBondParameters brookBondParameters for BrookBondParameters
-       *
-       * @return DefaultReturnValue
-       *
-       */
-      
-      int _setBondParameters( BondParameterIndices index, BrookBondParameters* brookBondParameters );
-      
-      /** 
-       * Get BrookBondParameters at specified index
-       * 
-       * @param   index
-       *
-       * @return  BrookBondParameters* object
-       *
-       */
-      
-      BrookBondParameters* _getBondParameters( BondParameterIndices index ) const;
-      
-};
-
-} // namespace OpenMM
-
-#endif /* OPENMM_BROOK_INTERFACE_H_ */
--- a/platforms/brook/src/kernels/invmap.h
+++ b/platforms/brook/src/kernels/invmap.h
-#ifndef __INVMAP_H__
-#define __INVMAP_H__
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/*
- * For each atom, calculates the positions at which it's
- * forces are to be picked up from and stores the position
- * in the appropriate index.
- *
- * Input: number of dihedrals, the atom indices, and a flag indicating
- *        whether we're doing i(0), j(1), k(2) or l(3)
- * Output: an array of counts per atom
- *         arrays of inversemaps
- *         nimaps - the number of invmaps actually used.
- * 
- * */
-int
-gpuCalcInvMap( 
-		int posflag,  //0-niatoms-1
-		int niatoms,  //3 for angles, 4 for torsions, impropers
-		int nints,    //number of interactions
-		int natoms,   //number of atoms
-		int *atoms, //gromacs interaction list
-		int nmaps,      //maximum number of inverse maps
-      int counts[],   //output counts of how many places each atom occurs
-		float4 *invmaps[], //output array of nmaps inverse maps
-		int *nimaps        //output max number of inverse maps actually used
-		);
-
-void
-gpuPrintInvMaps( int nmaps, int natoms, int counts[], float4 *invmap[],	FILE* logFile );
-
-/* We are still plagued by kernel call overheads. This is for a big fat
- * merged inverse gather kernel:
- * Since we have 32 bit floats, we have 23 bits of mantissa or the largest
- * integer we can represent is 2^23. So it should be quite safe to add 
- * 100000 * n to the index where n is the stream in which we should do the
- * lookup. This assumes that nints < 100000, preferably nints << 100000
- * which should always be true
- * */
-int
-gpuCalcInvMap_merged( 
-		int nints,    //number of interactions
-		int natoms,   //number of atoms
-		int *atoms,   //ijkl,ijkl,ijkl...
-		int nmaps,      //maximum number of inverse maps
-        int counts[],   //output counts of how many places each atom occurs
-		float4 *invmaps[], //output array of nmaps inverse maps
-		int *nimaps        //output max number of inverse maps actually used
-		);
-
-/* Repacks the invmap streams for more efficient access in the
- * merged inverse gather kernel
- *
- * buf should be nimaps * natoms large.
- * */
-int
-gpuRepackInvMap_merged( int natoms, int nmaps, int *counts, 
-		float4 *invmaps[], float4 *buf );
-
-#endif //__INVMAP_H__
--- a/platforms/brook/src/kernels/kcom.br
+++ b/platforms/brook/src/kernels/kcom.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel calculates the linear momentum
-
-   @param mass           stream of masses
-   @param velocities     stream of velocities
-   @param linearMomentum output stream of linear momentum
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kCalculateLinearMomentum( float mass<>, float3 velocities<>, out float3 linearMomentum<> ){
-   linearMomentum = mass*velocities;
-}
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel scales the linear momentum
-
-   @param scale              scale factor
-   @param linearMomentumIn   input linearMomentum
-   @param linearMomentumOut  output linearMomentum
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kScale( float scale, float3 linearMomentumIn<>, out float3 linearMomentumOut<> ){
-   linearMomentumOut = scale*linearMomentumIn;
-}
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel calculates the total linear momentum via a reduction
-
-   @param momentum       momentum
-   @param linearMomentum total momentum
-
-   Note: treating the output linearMomentum as non-stream did not compile, even though
-   I found examples in Brook testing code that had that construct -- MSF 3/6/08
-
-   --------------------------------------------------------------------------------------- */
-
-reduce void kReduceLinearMomentum( float3 momentum<>, reduce float3 linearMomentum<> ){
-   linearMomentum += momentum;
-}
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel calculates the total linear momentum via a reduction
-
-   @param atomStrWidth   atom stream width
-   @param numberOfAtoms  number of atoms
-   @param momentum       momentum
-   @param linearMomentum total momentum
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kSumLinearMomentum( float atomStrWidth, float numberOfAtoms, float scale, float3 momentum[][],
-                                out float3 linearMomentum<> ){
-
-   float atomCount;
-   float2 atomIndex;
-
-   atomCount      = 0.0f;
-   linearMomentum = float3( 0.0f, 0.0f, 0.0f );
-   atomIndex.y    =  0.0f;
-   atomIndex.x    =  0.0f;
-
-   while( atomCount < numberOfAtoms ){
-
-      linearMomentum += momentum[atomIndex];
-
-      atomIndex.x    += 1.0f;
-      if( atomIndex.x == atomStrWidth ){
-         atomIndex.x  = 0.0f; 
-         atomIndex.y += 1.0f;
-      }
-
-      atomCount += 1.0f;
-   } 
-   linearMomentum *= scale;
-}
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel calculates the total linear momentum via a reduction
-
-   @param momentum       momentum
-   @param linearMomentum total momentum
-
-   Note: treating the output linearMomentum as non-stream did not compile, even though
-   I found examples in Brook testing code that had that construct -- MSF 3/6/08
-
-   --------------------------------------------------------------------------------------- */
-
-reduce void kSum( float3 array<>, reduce float3 sum<> ){
-   //sum += array;
-   //sum += array.x + array.y + array.z;
-   //sum += float3( 1.0, 1.0, 1.0 );
-/*
-   sum.x += 1.0;
-   sum.y += 1.0;
-   sum.z += 1.0;
-*/
-   sum += array;
-   //sum += 1.0;
-}
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel subtracts the (total linear momentum)/totalMass from the velocities 
-
-   @param linearMomentum  normalized linear momentum
-   @param velocities      stream of velocities
-   @param velocitiesOut   output stream of velocities
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kRemoveLinearMomentum( float3 linearMomentum[], float3 velocities<>, out float3 velocitiesOut<> ){
-   velocitiesOut = velocities - linearMomentum[0];
-}
--- a/platforms/brook/src/kernels/kcom.h
+++ b/platforms/brook/src/kernels/kcom.h
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-void  kCalculateLinearMomentum( ::brook::stream mass, ::brook::stream velocities, ::brook::stream linearMomentum );
-void  kReduceLinearMomentum( ::brook::stream momentum, ::brook::stream linearMomentum );
-//void  kSumLinearMomentum( ::brook::stream momentum, float3* linearMomentum );
-void  kScale( float scale, ::brook::stream linearMomentumIn, ::brook::stream linearMomentumOut );
-void  kRemoveLinearMomentum( ::brook::stream linearMomentum, ::brook::stream velocitiesIn, ::brook::stream velocitiesOut );
-void kSum( ::brook::stream array, ::brook::stream sum );
-
-/**---------------------------------------------------------------------------------------
-
-   This kernel calculates the total linear momentum via a reduction
-
-   @param atomStrWidth   atom stream width
-   @param numberOfAtoms  number of atoms
-   @param scale          sum of inverse masses
-   @param momentum       momentum
-   @param linearMomentum total momentum
-
-   --------------------------------------------------------------------------------------- */
-
-void kSumLinearMomentum( float atomStrWidth, float numberOfAtoms, float scale, ::brook::stream momentum, ::brook::stream linearMomentum );
-
-
--- a/platforms/brook/src/kernels/kcommon.br
+++ b/platforms/brook/src/kernels/kcommon.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-kernel void kgetxyz( float4 instr<>, out float3 outstr<> ){
-	outstr = instr.xyz;
-}
-
-//Zeroes out a stream
-kernel void kzerof3( out float3 outstr<> ){
-	outstr = float3( 0.0f, 0.0f, 0.0f );
-}
-
-//Zeros out a stream
-kernel void kzerof4( out float4 outstr<> ){
-	outstr = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-}
-
-kernel void ksetf4( float4 val, out float4 outstr<> ){
-	outstr = val;
-}
-
-kernel void ksetStr3( float3 instr<>, out float3 outstr<> ){
-	outstr  = instr;
-}
-
-kernel void kadd3( float3 in1<>, float3 in2<>, out float3 outstr<> ){ 
-   outstr  = in1 + in2;
-}
-
--- a/platforms/brook/src/kernels/kcommon.h
+++ b/platforms/brook/src/kernels/kcommon.h
-#ifndef __KCOMMON_H__
-#define __KCOMMON_H__
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-void  kgetxyz(::brook::stream instr, ::brook::stream outstr); 
-
-void  kzerof3(::brook::stream outstr);
-void  kzerof4(::brook::stream outstr); 
-void  kzerof4(::brook::stream outstr); 
-
-void  ksetf4(const float4  val, ::brook::stream outstr); 
-void kadd3( ::brook::stream instr1, ::brook::stream instr2, ::brook::stream outstr );
-void ksetStr3( ::brook::stream instr, ::brook::stream outstr );
-
-#endif // __KCOMMON_H__
--- a/platforms/brook/src/kernels/kforce.h
+++ b/platforms/brook/src/kernels/kforce.h
-#ifndef __KFORCE_H__
-#define __KFORCE_H__
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-//Define a generic force kernel prototype
-//To make switching easier to look at
-//This should match the kernels from kforce.br
-typedef void (*gpuNBForceFunction)(
-		const float natoms,
-		const float  dupfac,
-		const float  strheight,
-		const float  strwidth,
-		const float  jstrwidth,
-		const float  fstrwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream isigeps,
-		::brook::stream sigma,
-		::brook::stream epsilon,
-		::brook::stream excl,
-		::brook::stream outforce1,
-		::brook::stream outforce2 
-		);
-
-void  knbforce_CDLJ (const float  natoms,
-		const float  dupfac,
-		const float  strheight,
-		const float  strwidth,
-		const float  jstrwidth,
-		const float  fstrwidth,
-		const float  epsfac,
-		::brook::stream posq,
-		::brook::stream isigeps,
-		::brook::stream sigma,
-		::brook::stream epsilon,
-		::brook::stream excl,
-		::brook::stream outforce1,
-		::brook::stream outforce2); 
-
-void  knbforce_CDLJ4(
-      const float  natoms,
-      const float  nAtomsCeiling,
-		const float  dupfac,
-		const float  strheight,
-		const float  strwidth,
-		const float  jstrwidth,
-		const float  fstrwidth,
-		const float  epsfac,
-		::brook::stream posq,
-		::brook::stream charge,
-		::brook::stream isigeps,
-		::brook::stream sigma,
-		::brook::stream epsilon,
-		::brook::stream excl,
-		::brook::stream outforce1,
-		::brook::stream outforce2, 
-		::brook::stream outforce3, 
-		::brook::stream outforce4); 
-
-void  kmerge_partial_forces (const float  repfac,
-		const float  atomStrWidth,
-		const float  pforceStrWidth,
-		const float  natoms,
-		::brook::stream pforce1,
-		::brook::stream pforce2,
-		::brook::stream force); 
-
-void kMergeFloat3_4( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-		::brook::stream pforce1,
-		::brook::stream pforce2,
-		::brook::stream pforce3,
-		::brook::stream pforce4,
-		::brook::stream force );
-
-
-void kMergeFloat3_4_nobranch( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-		::brook::stream pforce1,
-		::brook::stream pforce2,
-		::brook::stream pforce3,
-		::brook::stream pforce4,
-		::brook::stream force );
-
-typedef void ( *gpuNBForceFunction14 )
-	(const float  natoms,
-		const float  strwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream sigeps,
-		::brook::stream fi,
-		::brook::stream fj); 
-
-
-void  kforce14_CDLJ
-	(const float  natoms,
-		const float  strwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream sigeps,
-		::brook::stream fi,
-		::brook::stream fj); 
-
-void  kforce14_LDLJ 
-	(const float  natoms,
-		const float  strwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream sigeps,
-		::brook::stream fi,
-		::brook::stream fj); 
-
-void  kforce14_SFDLJ 
-	(const float  natoms,
-		const float  strwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream sigeps,
-		::brook::stream fi,
-		::brook::stream fj); 
-
-void  kforce14_SHEFFIELD
-	(const float  natoms,
-		const float  strwidth,
-		const float  epsfac,
-		const float4 params,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream sigeps,
-		::brook::stream fi,
-		::brook::stream fj); 
-
-typedef void (*gpuBondedFunction)(
-		const float  epsfac,
-		const float  xstrwidth,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl);
-
-void  kbonded_CDLJ (const float  epsfac,
-		const float  xstrwidth,
-		::brook::stream posq,
-		::brook::stream charge,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl); 
-
-void  kbonded_CDLJDebug (const float  epsfac,
-		const float  xstrwidth,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl); 
-
-void  kbonded_LDLJ (const float  epsfac,
-		const float  xstrwidth,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl); 
-
-void  kbonded_SFDLJ (const float  epsfac,
-		const float  xstrwidth,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl); 
-
-void  kbonded_SHEFFIELD (const float  epsfac,
-		const float  xstrwidth,
-		const float4 params,
-		::brook::stream posq,
-		::brook::stream atoms,
-		::brook::stream parm0,
-		::brook::stream parm1,
-		::brook::stream parm2,
-		::brook::stream parm3,
-		::brook::stream parm4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl); 
-
-void kAddForces3_4( 
-		const float  conversion,
-      ::brook::stream force1, 
-      ::brook::stream force2, 
-      ::brook::stream outForce );
- 
-#endif //__KFORCE_H__
--- a/platforms/brook/src/kernels/kforce_CDLJ.br
+++ b/platforms/brook/src/kernels/kforce_CDLJ.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-kernel float4 scalar_force_CDLJ( float4 qq, float epsfac, float4 sig, float4 eps, float4 r2 ) {
- float4 invr, invrsig2, invrsig6;
- float4 f;
-
- invr = rsqrt( r2 );
- invrsig2 = invr * sig;
- invrsig2 = invrsig2 * invrsig2;
- invrsig6 = invrsig2 * invrsig2 * invrsig2;
- f = eps * ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
- f += epsfac*qq*invr;
- f *= invr*invr;
- return f;
-}
-
-kernel float scalar_force_single_CDLJ( float qq, float epsfac, float sig, float eps, float r2 ) {
- float invr, invrsig2, invrsig6;
- float f;
-
- invr = rsqrt( r2 );
- invrsig2 = invr * sig;
- invrsig2 = invrsig2 * invrsig2;
- invrsig6 = invrsig2 * invrsig2 * invrsig2;
- f = eps * ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
- f += epsfac*qq*invr;
- f *= invr*invr;
- return f;
-}
-kernel float4 get_r2_CDLJ( float3 d1, float3 d2, float3 d3, float3 d4 ) {
- float4 r2;
- r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
- return r2;
-}
-
-kernel void knbforce_CDLJ (
-  float natoms,
-  float dupfac,
-  float strheight,
-  float strwidth,
-  float jstrwidth,
-  float fstrwidth,
-  float epsfac,
-  float4 params,
-  float4 posq[][],
-  float4 isigeps<>,
-  float4 sigma[][],
-  float4 epsilon[][],
-  float2 excl[][],
-
-
-  out float3 outforce1<>,
-  out float3 outforce2<> ) {
-
- float4 jsig, jeps;
- float3 ipos1, ipos2;
- float3 jpos1, jpos2, jpos3, jpos4;
- float qi1, qi2;
- float4 qj, qq;
- float3 d1, d2, d3, d4;
- float2 exclind;
- float4 sig, eps;
- float2 exclusions;
- float4 r2;
- float3 pad;
- float4 exclmask;
- float4 exclconst;
- float2 iatom;
- float2 jstrindex;
- float a_iatom;
- float linind;
-
- float breakflag;
- float2 jatom;
- float4 fs;
- float jend, jstart;
- float which_rep;
-
- exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
-
-
- outforce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
- outforce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-
-   a_iatom = 2.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
-
-
- linind = fmod( a_iatom, natoms );
- iatom.x = fmod( linind, strwidth );
- iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
-
-
-
- ipos1 = posq[ iatom ].xyz;
- qi1 = posq[ iatom ].w;
-
- iatom.x += 1;
- ipos2 = posq[ iatom ].xyz;
- qi2 = posq[ iatom ].w;
-
-
- breakflag = 1.0f;
- jatom.y = 0.0f;
-
-
-
-
-
-
-
- which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
- jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
- jstart = which_rep * jend;
- if ( which_rep > dupfac - 1 - 0.5f ) {
-
-  jend = 1e6f;
- }
- jend += jstart;
-
- exclind.x = linind/2.0f;
- exclind.y = jstart * strwidth/4.0f;
-
- jatom.y = jstart;
- while ( jatom.y < jend && breakflag > 0.0f ) {
-  jatom.x = 0;
-  while ( jatom.x < strwidth && breakflag > 0.0f ) {
-
-   exclusions = excl[ exclind ];
-
-   exclmask = fmod( exclusions.x, exclconst );
-
-
-   linind = (jatom.x + jatom.y*strwidth)/4.0f;
-   jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
-   jstrindex.x = linind - jstrindex.y * jstrwidth;
-
-
-   jsig = sigma[ jstrindex ];
-   jeps = epsilon[ jstrindex ];
-
-   jpos1 = posq[ jatom ].xyz;
-   qj.x = posq[ jatom ].w;
-   jatom.x += 1.0f;
-
-   jpos2 = posq[ jatom ].xyz;
-   qj.y = posq[ jatom ].w;
-   jatom.x += 1.0f;
-
-   jpos3 = posq[ jatom ].xyz;
-   qj.z = posq[ jatom ].w;
-   jatom.x += 1.0f;
-
-   jpos4 = posq[ jatom ].xyz;
-   qj.w = posq[ jatom ].w;
-   jatom.x += 1.0f;
-
-
-   sig = isigeps.x + jsig;
-   eps = isigeps.y * jeps;
-   qq = qi1 * qj;
-
-   d1 = ipos1 - jpos1;
-   d2 = ipos1 - jpos2;
-   d3 = ipos1 - jpos3;
-   d4 = ipos1 - jpos4;
-
-
-   r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
-   fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce1 += fs.x * d1;
-   outforce1 += fs.y * d2;
-   outforce1 += fs.z * d3;
-   outforce1 += fs.w * d4;
-/*
-outforce1.x += exclmask.x;
-outforce1.x += exclmask.y;
-outforce1.x += exclmask.z;
-outforce1.x += exclmask.w;
-if( exclmask.x > 0.5f ){
-   outforce1.x += 1.0f;
-}
-if( exclmask.y > 0.5f ){
-   outforce1.x += 1.0f;
-}
-if( exclmask.z > 0.5f ){
-   outforce1.x += 1.0f;
-}
-if( exclmask.w > 0.5f ){
-   outforce1.x += 1.0f;
-}
-*/
-
-   exclmask = fmod( exclusions.y, exclconst );
-
-   sig = isigeps.z + jsig;
-   eps = isigeps.w * jeps;
-   qq = qi2 * qj;
-
-   d1 = ipos2 - jpos1;
-   d2 = ipos2 - jpos2;
-   d3 = ipos2 - jpos3;
-   d4 = ipos2 - jpos4;
-
-   r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
-   fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce2 += fs.x * d1;
-   outforce2 += fs.y * d2;
-   outforce2 += fs.z * d3;
-   outforce2 += fs.w * d4;
-
-/*
-outforce2.x += exclmask.x;
-outforce2.x += exclmask.y;
-outforce2.x += exclmask.z;
-outforce2.x += exclmask.w;
-if( exclmask.x > 0.5f ){
-   outforce2.x += 1.0f;
-}
-if( exclmask.y > 0.5f ){
-   outforce2.x += 1.0f;
-}
-if( exclmask.z > 0.5f ){
-   outforce2.x += 1.0f;
-}
-if( exclmask.w > 0.5f ){
-   outforce2.x += 1.0f;
-}
-*/
-
-   exclind.y += 1.0f;
-
-
-   if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
-    breakflag = -1.0f;
-  }
-
-  jatom.y += 1.0f;
- }
-
-}
-
-kernel void knbforce_CDLJ4(
-  float natoms,
-  float nAtomsCeiling,
-  float dupfac,
-  float strheight,
-  float strwidth,
-  float jstrwidth,
-  float fstrwidth,
-  float epsfac,
-  float3 posq[][],
-  float  charge[][],
-  float4 isigeps[][],
-  float4 sigma[][],
-  float4 epsilon[][],
-  float excl[][],
-  out float3 outforce1<>,
-  out float3 outforce2<>,
-  out float3 outforce3<>,
-  out float3 outforce4<> ) {
-
- float4 jsig, jeps;
- float3 ipos1, ipos2, ipos3, ipos4;
- float4 jpos;
- float3 jpos1, jpos2, jpos3, jpos4;
- float qi1, qi2, qi3, qi4;
-
- float isig1, isig2, isig3, isig4;
- float ieps1, ieps2, ieps3, ieps4;
-
- float4 qj, qq;
- float3 d1, d2, d3, d4;
- float2 exclind;
- float4 sig, eps;
- float  exclusions;
- float4 r2;
- float3 pad;
- float4 exclmask;
- float4 exclconst;
- float2 iatom;
- float2 jstrindex;
- float a_iatom;
- float linind;
-
- float breakflag;
- float2 jatom;
- float4 fs;
- float jend, jstart;
- float which_rep;
- float maskMultiplier;
-
- maskMultiplier = 1.0e+06f;
-
- exclconst  = float4( 2.0f, 3.0f, 5.0f, 7.0f );
-
- outforce1  = float3( 0.0f, 0.0f, 0.0f );
- outforce2  = float3( 0.0f, 0.0f, 0.0f );
- outforce3  = float3( 0.0f, 0.0f, 0.0f );
- outforce4  = float3( 0.0f, 0.0f, 0.0f );
-
- a_iatom    = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
-
- // linind     = fmod( a_iatom, natoms );
- linind     = fmod( a_iatom, nAtomsCeiling );
-
- iatom.x    = fmod( linind, strwidth );
- iatom.y    = round( (linind - fmod(linind, strwidth))/strwidth );
-
-/* --------------------------------------------------------------------- */
-
- ipos1      = posq[ iatom ];
- qi1        = charge[ iatom ];
-
- jstrindex  = isigeps[ iatom ];
- isig1      = jstrindex.x;
- ieps1      = jstrindex.y;
-
-/* --------------------------------------------------------------------- */
-
- iatom.x   += 1;
- ipos2      = posq[ iatom ];
- qi2        = charge[ iatom ];
-
- jstrindex  = isigeps[ iatom ];
- isig2      = jstrindex.x;
- ieps2      = jstrindex.y;
-
-/* --------------------------------------------------------------------- */
-
- iatom.x   += 1;
- ipos3      = posq[ iatom ];
- qi3        = charge[ iatom ];
-
- jstrindex  = isigeps[ iatom ];
- isig3      = jstrindex.x;
- ieps3      = jstrindex.y;
-
-/* --------------------------------------------------------------------- */
-
- iatom.x   += 1;
- ipos4      = posq[ iatom ];
- qi4        = charge[ iatom ];
-
-
- jstrindex  = isigeps[ iatom ];
- isig4      = jstrindex.x;
- ieps4      = jstrindex.y;
-
-/* --------------------------------------------------------------------- */
-
- breakflag  = 1.0f;
- jatom.y    = 0.0f;
-
- // which_rep  = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
- which_rep  = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
-
- jend       = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
- jstart     = which_rep * jend;
- if ( which_rep > dupfac - 1.5f ) {
-  jend = 1e6f;
- }
- jend      += jstart;
-
- exclind.x  = linind;
- exclind.y  = jstart * strwidth/4.0f;
- exclind.y  = floor(  exclind.y  + 0.25f );
-
- jatom.y    = jstart;
- while ( jatom.y < jend && breakflag > 0.0f ) {
-  jatom.x = 0;
-  while ( jatom.x < strwidth && breakflag > 0.0f ) {
-
-   exclusions  = excl[ exclind ];
-   exclmask    = fmod( exclusions, exclconst );
-
-   linind      = (jatom.x + jatom.y*strwidth)/4.0f;
-   linind      = floor( linind + 0.25f );
-
-   jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
-   jstrindex.x = linind - jstrindex.y * jstrwidth;
-   jstrindex.x = floor( jstrindex.x + 0.25f );
-
-   /* --------------------------------------------------------------------- */
-
-   jsig        = sigma[ jstrindex ];
-   jeps        = epsilon[ jstrindex ];
-
-   jpos1       = posq[ jatom ];
-   qj.x        = charge[ jatom ];
-   jatom.x     = floor( jatom.x + 1.2f );
-
-   jpos2       = posq[ jatom ];
-   qj.y        = charge[ jatom ];
-   jatom.x     = floor( jatom.x + 1.2f );
-
-   jpos3       = posq[ jatom ];
-   qj.z        = charge[ jatom ];
-   jatom.x     = floor( jatom.x + 1.2f );
-
-   jpos4       = posq[ jatom ];
-   qj.w        = charge[ jatom ];
-   jatom.x     = floor( jatom.x + 1.2f );
-
-   /* --------------------------------------------------------------------- */
-
-   sig         = isig1 + jsig;
-   eps         = ieps1 * jeps;
-   qq          = qi1*qj;
-
-   d1          = ipos1 - jpos1;
-   d2          = ipos1 - jpos2;
-   d3          = ipos1 - jpos3;
-   d4          = ipos1 - jpos4;
-
-   r2          = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
-   fs          = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce1  += fs.x * d1;
-   outforce1  += fs.y * d2;
-   outforce1  += fs.z * d3;
-   outforce1  += fs.w * d4;
-
-   /* --------------------------------------------------------------------- */
-
-   //exclind.x  += 1.0f;
-   exclind.x   = floor( exclind.x + 1.2f );
-   exclusions  = excl[ exclind ];
-   exclmask    = fmod( exclusions, exclconst );
-
-   sig         = isig2 + jsig;
-   eps         = ieps2 * jeps;
-   qq          = qi2 * qj;
-
-   d1          = ipos2 - jpos1;
-   d2          = ipos2 - jpos2;
-   d3          = ipos2 - jpos3;
-   d4          = ipos2 - jpos4;
-
-   r2          = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
-   fs          = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce2  += fs.x * d1;
-   outforce2  += fs.y * d2;
-   outforce2  += fs.z * d3;
-   outforce2  += fs.w * d4;
-
-   /* --------------------------------------------------------------------- */
-
-   //exclind.x  += 1.0f;
-   exclind.x   = floor( exclind.x + 1.2f );
-   exclusions  = excl[ exclind ];
-   exclmask    = fmod( exclusions, exclconst );
-
-   sig         = isig3 + jsig;
-   eps         = ieps3 * jeps;
-   qq          = qi3   * qj;
-
-   d1          = ipos3 - jpos1;
-   d2          = ipos3 - jpos2;
-   d3          = ipos3 - jpos3;
-   d4          = ipos3 - jpos4;
-
-   r2          = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
-   fs          = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce3  += fs.x * d1;
-   outforce3  += fs.y * d2;
-   outforce3  += fs.z * d3;
-   outforce3  += fs.w * d4;
-
-   /* --------------------------------------------------------------------- */
-
-   //exclind.x  += 1.0f;
-   exclind.x   = floor( exclind.x + 1.2f );
-   exclusions  = excl[ exclind ];
-   exclmask    = fmod( exclusions, exclconst );
-
-   sig         = isig4 + jsig;
-   eps         = ieps4 * jeps;
-   qq          = qi4   * qj;
-
-   d1          = ipos4 - jpos1;
-   d2          = ipos4 - jpos2;
-   d3          = ipos4 - jpos3;
-   d4          = ipos4 - jpos4;
-
-   r2          = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
-   fs          = scalar_force_CDLJ( qq, epsfac, sig, eps, r2 );
-
-   outforce4  += fs.x * d1;
-   outforce4  += fs.y * d2;
-   outforce4  += fs.z * d3;
-   outforce4  += fs.w * d4;
-
-   /* --------------------------------------------------------------------- */
-
-   //exclind.x  -= 3.0f;
-   exclind.x   = floor( exclind.x - 2.9f );
-
-   //exclind.y  += 1.0f;
-   exclind.y  = floor(  exclind.y  + 1.25f );
-
-   if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
-  }
-
-  //jatom.y += 1.0f;
-  jatom.y  = floor( jatom.y + 1.2f );
- }
-
-}
-
-kernel void kforce14_CDLJ(
-  float natoms,
-  float strwidth,
-  float epsfac,
-  float4 params,
-  float2 atoms<>,
-  float4 posq[][],
-  float2 sigeps<>,
-  out float3 fi<>,
-  out float3 fj<>
-  )
-{
- float2 ai, aj;
- float3 d1;
- float r2;
- float qq, fs;
-
- //ai.y = floor( atoms.x / strwidth );
- ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
- ai.x = atoms.x - ai.y * strwidth;
-
- //aj.y = floor( atoms.y / strwidth );
- aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
- aj.x = atoms.y - aj.y * strwidth;
-
- d1 = posq[ai].xyz - posq[aj].xyz;
- qq = posq[ai].w * posq[aj].w;
-
- r2 = dot( d1, d1 );
-
- fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2 );
-
- fi = fs.x * d1;
- fj = -fi;
-}
-
-kernel void kAcosV( float x<>, out float acosVal<> ){
-
-   float x2, x3, x5, x7, x9, x11, x13, x15, x17, x19;
-
-   float pi2           = 1.57079632679489661923f;
-   float coefficient3  = 1.0f/6.0f;
-   float coefficient5  = 3.0f/40.0f;
-   float coefficient7  = 5.0f/112.0f;
-   float coefficient9  = 35.0f/1152.0f;
-   float coefficient11 = 63.0f/2816.0f;
-   float coefficient13 = 231.0f/13312.0f;
-   float coefficient15 = 143.0f/10240.0f;
-   float coefficient17 = 6435.0f/557056.0f;
-   float coefficient19 = 12155.0f/1245184.0f;
- 
-   // ---------------------------------------------------------------------------------------
-
-   x2             =  x*x;
-   x3             =  x*x2;
-   x5             = x3*x2;
-   x7             = x5*x2;
-   x9             = x7*x2;
-   x11            = x9*x2;
-   x13            = x11*x2;
-   x15            = x13*x2;
-   x17            = x15*x2;
-   x19            = x17*x2;
-
-   acosVal        = x19*coefficient19;
-   acosVal       += x17*coefficient17;
-   acosVal       += x15*coefficient15;
-   acosVal       += x13*coefficient13;
-   acosVal       += x11*coefficient11;
-   acosVal       += x9*coefficient9;
-   acosVal       += x7*coefficient7;
-   acosVal       += x5*coefficient5;
-   acosVal       += x3*coefficient3 + x;
-   acosVal        = pi2 - acosVal;
-
-}
-
-kernel void kAcosTanV( float xIn<>, out float acosVal<> ){
-
-   const float c1             = 48.70107004404898384f;
-   const float c2             = 49.5326263772254345f;
-   const float c3             = 9.40604244231624f;
-   const float c4             = 48.70107004404996166f;
-   const float c5             = 65.7663163908956299f;
-   const float c6             = 21.587934067020262f;
-   const float tantwelfthpi   = 0.26794919243112270647255365849413f;
-   const float tansixthpi     = 0.57735026918962576450914878050196f;
-   const float sixthpi        = 0.52359877559829887307710723054658f;
-   const float halfpi         = 1.5707963267948966192313216916398f;
-
-   float x2, y, x;
-   float sign, complement, region;
-
-   // ---------------------------------------------------------------------------------------
-
-   complement                 = 0.0f;
-   region                     = 0.0f;
-
-   x                          = xIn;
-   y                          = rsqrt( (1.0f + x)*(1.0f  - x) );
-   x                         *= y;
-   if( x < 0.0f ){
-      x     = -x;
-      sign  = -1.0f;
-   } else {
-      sign  =  1.0f;
-   }
-
-   if( x > 1.0f ){
-      x           = 1.0f/x;
-      complement  = 1.0f;
-   } else {
-      complement  = 0.0f;
-   }
-
-   if( x > tantwelfthpi ){
-      x        = (x-tansixthpi)/(1.0f + tansixthpi*x);
-      region   = sixthpi;
-   } else {
-      region   = 0.0f;
-   }
-
-   x2          = x*x;
-   y           = (x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2))));
-
-   y          += region;
-
-   y           = (complement > 0.0f) ? (halfpi- y) : y;
-   acosVal     = halfpi - sign*y;
-
-}
-
-kernel void kAcos( float xIn, out float acosVal<> ){
-
-   const float c1             = 48.70107004404898384f;
-   const float c2             = 49.5326263772254345f;
-   const float c3             = 9.40604244231624f;
-   const float c4             = 48.70107004404996166f;
-   const float c5             = 65.7663163908956299f;
-   const float c6             = 21.587934067020262f;
-   const float tantwelfthpi   = 0.26794919243112270647255365849413f;
-   const float tansixthpi     = 0.57735026918962576450914878050196f;
-   const float sixthpi        = 0.52359877559829887307710723054658f;
-   const float halfpi         = 1.5707963267948966192313216916398f;
-
-   float x2, y, x;
-   float sign, complement, region;
-
-   // ---------------------------------------------------------------------------------------
-
-   // acos(x) = atan(x/sqrt(1-x**2))
-
-   x        = xIn;
-   y        = rsqrt( (1.0f + x)*(1.0f - x) );
-   x        *= y;
-
-   if( x < 0.0f ){
-      x     = -x;
-      sign  = -1.0f;
-   } else {
-      sign  =  1.0f;
-   }
-
-   if( x > 1.0f ){
-      x           = 1.0f/x;
-      complement  = 1.0f;
-   } else {
-      complement  = 0.0f;
-   }
-
-   if( x > tantwelfthpi ){
-      x        = (x - tansixthpi)/(1.0f + tansixthpi*x);
-      region   = sixthpi;
-   } else {
-      region   = 0.0f;
-   }
-
-   x2          = x*x;
-   y           = (x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2))));
-
-   y          += region;
-
-   y           = (complement > 0.0f) ? (halfpi- y) : y;
-   acosVal     = halfpi - sign*y;
-
-}
-
-kernel void kbonded_CDLJ (
-      float epsfac,
-      float xstrwidth,
-      float3 posq[][],
-      float  charge<>,
-      float4 atoms<>,
-      float4 parm0<>,
-      float4 parm1<>,
-      float4 parm2<>,
-      float4 parm3<>,
-      float4 parm4<>,
-      out float3 fi<>,
-      out float3 fj<>,
-      out float3 fk<>,
-      out float3 fl<>
-   ){
-  
-   float3 rij, rkj, rkl, ril;
-   float2 ai, aj, ak, al;
-   float3 m, n;
-   float sgnphi;
-   float cosfac;
-   float ddphi, ddphi_rb;
-   float3 u, v, s;
-   float invlkj, invlij, invlkl, msq, nsq, cos_phi, sin_phi;
-   float3 uij, ukj, ukl;
-   float phi, mdphi;
-   float rij_d_ukj, ukj_d_rkl;
-   float normfac;
-   float qq;
-   float3 fi_ang, fj_ang, fk_ang;
-   float3 fi_bond;
-   float cii, ckk, cik;
-   float fs, st, sth;
-   float3 fi_pair;
-   float r2 ;
-   float torsion_numerator;
-   float theta1, costheta1, invsintheta1;
-   float theta2, costheta2, invsintheta2;
-   float3 posqi, posqj, posqk, posql;
-   float sig, eps;
-  
-   // i-coordinates
-
-   ai.y  = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
-   ai.x  = atoms.x - ai.y * xstrwidth;
-   posqi = posq[ ai ];
-  
-   // j-coordinates
-
-   if ( atoms.y > -0.5f ) {
-      aj.y  = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
-      aj.x  = atoms.y - aj.y * xstrwidth;
-      posqj = posq[ aj ];
-   } else {
-      posqj = float3( 0.0f, 1.0f, 0.0f );
-   }
-  
-   // k-coordinates
-
-   if ( atoms.z > -0.5f ) {
-      ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
-      ak.x = atoms.z - ak.y * xstrwidth;
-      posqk = posq[ ak ];
-   } else { 
-      posqk = float3( 1.0f, 1.0f, 0.0f );
-   }
-  
-   // l-coordinates
-
-   if ( atoms.w > -0.5f ) {
-      al.y  = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
-      al.x  = atoms.w - al.y * xstrwidth;
-      posql = posq[ al ];
-   } else {
-      posql = float3( 1.0f, 1.0f, 1.0f );
-   }
-  
-   rij             = posqi - posqj;
-   rkj             = posqk - posqj;
-   rkl             = posqk - posql;
-   ril             = posqi - posql;
-  
-   invlij          = rsqrt( dot( rij, rij ) );
-   invlkj          = rsqrt( dot( rkj, rkj ) );
-   invlkl          = rsqrt( dot( rkl, rkl ) );
-  
-   uij             = rij * invlij;
-   ukj             = rkj * invlkj;
-   ukl             = rkl * invlkl;
-  
-   rij_d_ukj       = dot( rij, ukj );
-   ukj_d_rkl       = dot( ukj, rkl );
-  
-   m               = cross( uij, ukj );
-   n               = cross( ukj, ukl );
-  
-   costheta1       = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
-   //theta1          = acos( costheta1 );
-   kAcos( costheta1, theta1 );
-   invsintheta1    = rsqrt( (1.0f - costheta1) * (1.0f + costheta1) );
-  
-   costheta2       = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
-   //theta2          = acos( costheta2 );
-   kAcos( costheta2, theta2 );
-   invsintheta2    = rsqrt( (1.0f - costheta2) * (1.0f + costheta2) );
-  
-   cos_phi         = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
-   sgnphi          = sign( dot(rij, n) );
-  
-   phi             = sgnphi * acos( cos_phi );
-   mdphi           = parm1.w * phi - parm1.z;
-   ddphi           = -parm1.y * parm1.w * sin( mdphi );
-  
-   cos_phi         = -cos_phi;
-   sin_phi         = -sgnphi*sqrt( (1.0f - cos_phi) * (1.0f + cos_phi) );
-  
-   ddphi_rb        = 5.0f * parm1.x;
-   ddphi_rb        = 4.0f * parm0.w + ddphi_rb * cos_phi;
-   ddphi_rb        = 3.0f * parm0.z + ddphi_rb * cos_phi;
-   ddphi_rb        = 2.0f * parm0.y + ddphi_rb * cos_phi;
-   ddphi_rb        = parm0.x + ddphi_rb * cos_phi;
-   ddphi_rb        = -ddphi_rb * sin_phi;
-  
-   ddphi          += ddphi_rb;
-/*
-fi = float3( cos_phi, sgnphi, ddphi_rb );
-fj = float3( sin_phi, parm1.x, parm0.w );
-fk = float3( parm0.z, parm0.y, parm0.x );
-fl = float3( ddphi, 0.0f, 0.0f );
-*/
-  
-   fi              = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
-   fl              = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
-  
-   u               = rij_d_ukj * invlkj * fi;
-   v               = ukj_d_rkl * invlkj * fl;
-   s               = u - v;
-  
-   fj              = s - fi;
-   fk              = -(s + fl);
-  
-  
-   fs              = -parm2.y * ( theta1 - parm2.x );
-   st              = fs * invsintheta1;
-   st              = clamp( st, -100000.0f, 100000.0f );
-  
-   sth             = st * costheta1;
-  
-   cik             = st * invlkj * invlij;
-   cii             = sth * invlij;
-   ckk             = sth * invlkj;
-  
-   fi_ang          = -( cik * rkj - cii * uij );
-   fk_ang          = -( cik * rij - ckk * ukj );
-   fj_ang          = -fi_ang - fk_ang;
-  
-   fi             += fi_ang;
-   fj             += fj_ang;
-   fk             += fk_ang;
-   fs              = -parm2.w * ( theta2 - parm2.z );
-   st              = fs * invsintheta2;
-   st              = clamp( st, -100000.0f, 100000.0f );
-  
-   sth             = st * costheta2;
-  
-   cik             = st * invlkj * invlkl;
-   cii             = sth * invlkj;
-   ckk             = sth * invlkl;
-  
-   fi_ang          = ( cik * rkl - cii * ukj );
-   fk_ang          = ( cik * rkj - ckk * ukl );
-   fj_ang          = -fi_ang - fk_ang;
-  
-   fj             += fi_ang;
-   fk             += fj_ang;
-   fl             += fk_ang;
-  
-  
-   fi_bond         = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
-   fi             += fi_bond;
-   fj             += -fi_bond;
-  
-   fi_bond         = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
-   fj             += fi_bond;
-   fk             += -fi_bond;
-  
-   fi_bond         = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
-   fk             += fi_bond;
-   fl             += -fi_bond;
-  
-   if( parm4.z > -0.5f ){
-      r2        = dot( ril, ril );
-      fs        = scalar_force_single_CDLJ( charge, epsfac, parm4.z, parm4.w, r2 );
-      fi_pair   = fs * ril;
-      fi       += fi_pair;
-      fl       -= fi_pair;
-//fi = float3( charge, parm4.z, parm4.w );
-//fj = float3( 1.0f, 2.f, 3.0f );
-/*
-   } else {
-      fi = float3( 0.0f, 0.f, 0.0f );
-      fj = float3( 0.0f, 0.f, 0.0f );
-      fk = float3( 0.0f, 0.f, 0.0f );
-      fl = float3( 0.0f, 0.f, 0.0f );
-*/
-   }
-
-}
-
--- a/platforms/brook/src/kernels/kgbsa.br
+++ b/platforms/brook/src/kernels/kgbsa.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-//124 FLOPS [??? total]
-kernel void bornSumInternal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
-                             float iAtomicRadii, out float4 bornSum<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float4 r, rInverse, r2, r2Inverse;
-   float4 l_ij, l_ij2;
-   float4 u_ij, u_ij2;
-   float4 t3;
-   float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
-   float4 iAtomicRadii4;
-   float4 diff_u2_l2;
-
-   const float bigValue      = 1000000.0f;
-   const float4 bigValue4    = float4( bigValue, bigValue, bigValue, bigValue );
-
-   const float smallValue    = 0.000001f;
-   const float4 smallValue4  = float4(  smallValue, smallValue, smallValue, smallValue );
-
-   const float4 zero4        = float4(  0.0f,  0.0f,  0.0f,  0.0f  );
-   const float4 one4         = float4(  1.0f,  1.0f,  1.0f,  1.0f  );
-
-   float4 mask;
-
-   // ---------------------------------------------------------------------------------------
-
-   iAtomicRadii4      = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
-
-   r2                 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
-
-   // ---------------------------------------------------------------------------------------
-
-   // not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
-   // r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
-
-   mask               = r2 < smallValue4 ? zero4     : one4;
- 
-   // ---------------------------------------------------------------------------------------
-
-   r                  = sqrt( r2 ); //4
-   rInverse           = rsqrt( r2 ); //8
-
-   rPlusScaledJ       = r + jAtomicRadiiScaled; //4
-   rMinusScaledJ      = r - jAtomicRadiiScaled; //4
-   rMinusScaledJAbs   = abs( rMinusScaledJ ); 
-
-//40
-   // ---------------------------------------------------------------------------------------
-
-   mask               = iAtomicRadii4 >= rPlusScaledJ     ? zero4         : mask;
-   l_ij               = iAtomicRadii4 >  rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
-
-   // ---------------------------------------------------------------------------------------
-
-   l_ij               = 1.0f/l_ij; //4
-   l_ij2              = l_ij*l_ij; //4
-
-   u_ij               = 1.0f/rPlusScaledJ; //4
-   u_ij2              = u_ij*u_ij; //4
-   diff_u2_l2         = u_ij2 - l_ij2; //4
-
-   logRatio           = log(u_ij/l_ij); //4
-//24
-
-   // ---------------------------------------------------------------------------------------
-
-   // terms associated w/ dL/dr & dU/dr are zero and not included
-
-   // Born radius term
-
-   bornSum            =   l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2; //10*4
-
-   t3                 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4; //12 for true, but don't know how to average so saying 0
-   bornSum           += t3; //4
-   bornSum           *= mask; //4
-
-//60
-}
-
-//???? FLOPS 
-kernel void kCalculateBornRadii( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor, 
-                                 float streamWidth, float fstreamWidth, float3 posq[][], 
-                                 float2 scaledAtomicRadii[][], 
-                                 out float4 bornForce<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // atomic radii: radius - dielectric offset 
-
-   float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
-
-   // scaled atomic radii: scaleFactor*( radius - dielectric offset )
-
-   float  j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
-   float4 jScaledAtomicRadii;
-
-   // (Born radii**2)*BornForce*ObcChainForce
-
-   // i,j coordinates
-
-   float3 i1pos, i2pos, i3pos, i4pos; 
-   float3 j1pos, j2pos, j3pos, j4pos;
-
-   // delta coordinates
-
-   float3 d1, d2, d3, d4;
-
-   // indices
-
-   float2 iAtom; 
-
-   float forceIndex;
-
-   //This is forceIndex mod numberOfAtoms, the true i index
-
-   float4 de, bornSum;
-
-   float iAtomLinearIndex, jLinind; 
-   
-   float2 jAtom;
-   float jEnd, jStart, jBlock;
-   float whichRep;
-   float tmp;
-   float tmp2;
-
-
-   // ---------------------------------------------------------------------------------------
-
-   // constants
-
-   const float I_Unroll          = 4.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // set linear index
-
-   iAtom                         = indexof( bornForce );
-   forceIndex                    = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
-
-   iAtomLinearIndex              = fmod( forceIndex, roundedUpAtoms );
-
-   // ---------------------------------------------------------------------------------------
-
-   // set gather coordinates
-
-   iAtom.x                       = fmod(  iAtomLinearIndex, streamWidth );
-   iAtom.y                       = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
-   
-   // ---------------------------------------------------------------------------------------
-
-   // fetch position, (atomic radius - dielectric offset )
-
-   i1pos                         = posq[                    iAtom ];
-   i1AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-
-   iAtom.x                      += 1;
-   i2pos                         = posq[                    iAtom ];
-   i2AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-
-   iAtom.x                      += 1;
-   i3pos                         = posq[                    iAtom ];
-   i3AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-
-   iAtom.x                      += 1;
-   i4pos                         = posq[                    iAtom ];
-   i4AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-
-   bornForce                     = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-   // inner loop indices setup
-   //changed the following instruction for rounding issues on some ASICs
-   //whichRep                      = floor( forceIndex / roundedUpAtoms );
-
-   tmp                           = fmod(forceIndex, roundedUpAtoms);
-   whichRep                      = round((forceIndex - tmp)/roundedUpAtoms);
-   
-   jBlock                        = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
-   
-   jStart                        = whichRep*jBlock;
-
-   jEnd                          = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
-
-   jAtom.y                       = jStart;
-   jLinind                       = jAtom.y*streamWidth;
-
-   // ---------------------------------------------------------------------------------------
-
-   while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind )  > 0.9f ){
-      jAtom.x = 0.0f;
-      while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){ 
-
-      // ---------------------------------------------------------------------------------------
-
-      // gather required values
-          
-         j1ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j1pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j2ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j2pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j3ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j3pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j4ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j4pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-         
-         jScaledAtomicRadii   = float4( j1ScaledAtomicRadii,  j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
-   
-         // ---------------------------------------------------------------------------------------
-   
-         // First set of 4 -- i == 1
-   
-         d1                 = j1pos - i1pos; //3
-         d2                 = j2pos - i1pos; //3
-         d3                 = j3pos - i1pos; //3
-         d4                 = j4pos - i1pos; //3
-   
-         // i = 1
-
-         bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i1AtomicRadii, bornSum ); //112 : 368(?)
-   
-         bornForce.x       += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 2
-
-         d1                 = j1pos - i2pos; //3
-         d2                 = j2pos - i2pos; //3
-         d3                 = j3pos - i2pos; //3
-         d4                 = j4pos - i2pos; //3
-   
-         bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i2AtomicRadii, bornSum );//112 : 368(?)
-  
-         bornForce.y       += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 3
-
-         d1                 = j1pos - i3pos; //3
-         d2                 = j2pos - i3pos; //3
-         d3                 = j3pos - i3pos; //3
-         d4                 = j4pos - i3pos; //3
-
-         bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i3AtomicRadii, bornSum );//112 : 368(?)
-   
-         bornForce.z       += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 4
-
-         d1                 = j1pos - i4pos; //3
-         d2                 = j2pos - i4pos; //3
-         d3                 = j3pos - i4pos; //3
-         d4                 = j4pos - i4pos; //3
-
-         bornSumInternal( d1, d2, d3, d4, jScaledAtomicRadii, i4AtomicRadii, bornSum );//112 : 368(?)
-   
-         bornForce.w       += bornSum.x + bornSum.y + bornSum.z + bornSum.w; //4
-
-         // ---------------------------------------------------------------------------------------
-   
-         // increment linear index by 4   
-   
-         jLinind           += 4.0f;
-   
-      }
-      jAtom.y       += 1.0f;
-   }
-}
-
-kernel void kPostCalculateBornRadii_nobranch( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float conversion,
-      float mergeNonObcForces,
-      float4 pstream1[][], 
-      float  atomicRadii<>,
-      out float bornRadii<>,
-      out float obcChain<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
-   float4 o1;
-   float tmp;
-
-   float2 iAtom; 
-   float4 forces;
-   float expPlus, expMinus;
-
-   // ---------------------------------------------------------------------------------------
-
-   // constants -- OBC Type II
-
-   const float alphaObc          = 1.0f;
-   const float betaObc           = 0.8f;
-   const float gammaObc          = 4.85f;
-   const float dielectricOffset  = 0.009f;
-
-   // ---------------------------------------------------------------------------------------
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( obcChain );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   // add current forces in inStream to forces stored in pstreams
-   // the .w entry is Born sum values; it will be used to calculate the
-   // Born radii and obcChain term
-
-   bornSum  = 0.0f;
-   
-   // sum over j-loop 'duplications' by gathering from pstreams
-
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-      qOff              = forceIndex - iUnroll*qIndex;
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-      o1                = pstream1[ pindex ];
-      
-      tmp               = qOff < 0.5f ? o1.x : o1.y;
-      tmp               = qOff < 1.5f ? tmp : o1.z;
-      tmp               = qOff < 2.5f ? tmp : o1.w;
-      
-      bornSum          += tmp;
-        
-      forceIndex       += roundNatoms;
-      
-   }
-
-   // compute Born radii and ObcChain
-
-   atomicRadiiOffset            = atomicRadii - dielectricOffset;
-
-   bornSum                     *= 0.5f*atomicRadiiOffset;
-
-   sum2                         = bornSum*bornSum;
-   sum3                         = bornSum*sum2;
-
-   // Tanh does not exist? 
-   // calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
-
-   // tanhSum                      = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
-
-   tanhSum                      = bornSum - betaObc*sum2 + gammaObc*sum3;
-   expPlus                      = exp( tanhSum );
-   expMinus                     = 1.0f/expPlus;
-   tanhSum                      = ( expPlus - expMinus )/( expPlus + expMinus );
-    
-   bornRadii                    = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );  
- 
-   obcIntermediate              = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
-   obcChain                     = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
-   if( atomIndex >= natoms ){
-      bornRadii = 0.0f;
-      obcChain  = 0.0f;
-   }
-
-}
-
-kernel void loop1Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jBornR,
-                           float4 jQ, float iBornR, float iQ, out float4 dGpol_dr<>,
-                           out float4 dGpol_dalpha2_ij<> ){
- 
-   // ---------------------------------------------------------------------------------------
-
-   float4 r2, alpha2_ij, D_ij, expTerm, denominator2, denominator, Gpol;
-
-   // ---------------------------------------------------------------------------------------
-
-   r2                 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
-   alpha2_ij          = jBornR*iBornR;
-   D_ij               = r2/(4.0f*alpha2_ij);
-   expTerm            = exp( -D_ij );
-   denominator2       = r2 + alpha2_ij*expTerm; 
-   denominator        = sqrt( denominator2 ); 
-   Gpol               = jQ/denominator; 
-   Gpol              *= iQ;
-   dGpol_dr           = -Gpol*( 1.0f - 0.25f*expTerm )/denominator2;  
-   dGpol_dalpha2_ij   = -0.5f*Gpol*expTerm*( 1.0f + D_ij )*jBornR/denominator2;
-
-}
-
-/* ---------------------------------------------------------------------------------------
-
-   Calculate nonpolar ACE term (Simbios) 
-
-   bornRadius:          Born radius
-   vdwRadius:           Vdw radius
-   duplicationFactor:   duplication factor
-   aceForce:            ACE term
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kAceNonPolarLoop1( float iBornRadius, float iVdwRadius, float duplicationFactor,
-                                out float aceForce<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // nonpolar term
-
-   float iSurface;
-   float iAceTerm;
-
-   // ---------------------------------------------------------------------------------------
-
-   // constants
-
-   // solvent radius
-
-   const float probeRadius       = 0.14f;
-
-   // PI*4*6*0.0054*1000 (0.0054=asolv from Tinker)
-   //const float PI_24_aI          = -0.3694512961;
-   const float PI_24_aI          = -407.1504079f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // etch i position and partial charge
-
-   // e = ai * term * (ri+probe)**2 * (ri/rb)**6
-   // (drbi) = drb(i) - 6.0fd0*e/rb
-
-   // (rI+probe)**2
-
-   iSurface                     = (iVdwRadius+probeRadius);
-   iSurface                     = iSurface*iSurface;
-
-   // (rI/rB)**6
-
-   iAceTerm                     = iVdwRadius/iBornRadius;
-   iAceTerm                     = iAceTerm*iAceTerm*iAceTerm;
-   iAceTerm                     = iAceTerm*iAceTerm;
-   aceForce                     = iSurface*iAceTerm*PI_24_aI/(duplicationFactor*iBornRadius);
-
-}
-
-/* ---------------------------------------------------------------------------------------
-
-   Calculate first loop force terms  (Simbios) 
-
-   numberOfAtoms:       no. of atoms
-   roundedUpAtoms:      rounded up number of atoms -- accounts for unrolling
-   duplicationFactor:   number of threads for inner loop
-   streamWidth:         atom stream width
-   fstreamWidth:        force stream width (output -- i-unroll)
-   soluteDielectric:    solute dielectric
-   solventDielectric:   solvent dielectric
-   includeAce:          include ACE term 
-   posq:                atom positions and charge
-   bornRadii:           Born radii
-   nonpolarForce:       nonpolar force (0 if nonpolar not included, else
-                        ACE value)
-   bornForce1:          i-unroll first force component, including dBornR/dr in .w
-   bornForce2:          i-unroll second force component, including dBornR/dr in .w 
-   bornForce3:          i-unroll first force component, including dBornR/dr in .w
-   bornForce4:          i-unroll second force component, including dBornR/dr in .w 
-
-   --------------------------------------------------------------------------------------- */
-
-kernel void kObcLoop1( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor, 
-                       float streamWidth, float fstreamWidth, float soluteDielectric,
-                       float solventDielectric, float includeAce,
-                       float3 posq[][], float  bornRadii[][], float2  atomicRadii[][], 
-                       out float4 bornForce1<>, out float4 bornForce2<>,
-                       out float4 bornForce3<>, out float4 bornForce4<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // Born radii
-
-   float i1BornR, i2BornR, i3BornR, i4BornR;
-   float j1BornR, j2BornR, j3BornR, j4BornR;
-   float4 jBornR;
-
-   // atomic radii
-
-   float i1AtomicR, i2AtomicR, i3AtomicR, i4AtomicR;
-
-   // i,j coordinates
-
-   float3 i1Pos, i2Pos, i3Pos, i4Pos; 
-   float3 j1Pos, j2Pos, j3Pos, j4Pos;
-   float4 j1PosQ, j2PosQ, j3PosQ, j4PosQ;
-
-   // i, j partial charges
-
-   float i1Q, i2Q, i3Q, i4Q;
-   float j1Q, j2Q, j3Q, j4Q;
-   float4 jQ;
-
-   float aceForce;
-  
-   // delta coordinates
-
-   float3 d1, d2, d3, d4;
-
-   // intermediate terms
-
-   float4 dGpol_dr, dGpol_dalpha2_ij;
-
-   // indices
-
-   float2 iAtom; 
-   float forceIndex;
-
-   // This is forceIndex mod numberOfAtoms, the true i index
-
-   float iAtomLinearIndex, jLinind; 
-
-   float2 jAtom;
-   float jEnd, jStart, jBlock;
-   float whichRep;
-   float tmp; 
-
-   // ---------------------------------------------------------------------------------------
-
-   // electricConstant           = -166.0f2691;
-   // preFactor                  = 2.0f*electricConstant*(1.0f - (1.0f/waterDielectric))
-   float preFactor               = -332.05382f;
-   const float I_Unroll          = 4.0f;
-   const float3 zero3            = float3( 0.0f, 0.0f, 0.0f );
-
-   // ---------------------------------------------------------------------------------------
-
-   preFactor                    *= ( (1.0f/soluteDielectric) - (1.0f/solventDielectric) );
-
-   iAtom                         = indexof( bornForce1 );
-   forceIndex                    = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
-
-   iAtomLinearIndex              = fmod( forceIndex, roundedUpAtoms );
-
-   // ---------------------------------------------------------------------------------------
-
-   // set gather index
-
-   iAtom.x                       = fmod(  iAtomLinearIndex, streamWidth );
-   iAtom.y                       = round( (iAtomLinearIndex - fmod(iAtomLinearIndex, streamWidth ))/streamWidth );
-
-   // ---------------------------------------------------------------------------------------
-
-   // etch i1 position and partial charge
-
-   jQ                            = posq[          iAtom ];
-   i1Pos                         = jQ.xyz;
-   i1Q                           = atomicRadii[   iAtom ].y;
-   i1Q                          *= preFactor;
-   i1BornR                       = bornRadii[     iAtom ];
-   i1AtomicR                     = atomicRadii[   iAtom ].x;
-   kAceNonPolarLoop1( i1BornR, i1AtomicR, duplicationFactor, aceForce );
-   bornForce1.xyz                = zero3;
-   bornForce1.w                  = includeAce > 0.5f ? aceForce : 0.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // etch i2 position and partial charge
-
-   iAtom.x                      += 1;
-   jQ                            = posq[          iAtom ];
-   i2Pos                         = jQ.xyz;
-   i2Q                           = atomicRadii[   iAtom ].y;
-   i2Q                          *= preFactor;
-   i2BornR                       = bornRadii[     iAtom ];
-   i2AtomicR                     = atomicRadii[   iAtom ].x;
-   kAceNonPolarLoop1( i2BornR, i2AtomicR, duplicationFactor, aceForce );
-   bornForce2.xyz                = zero3;
-   bornForce2.w                  = includeAce > 0.5f ? aceForce : 0.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // etch i3 position and partial charge
-
-   iAtom.x                      += 1;
-   jQ                            = posq[          iAtom ];
-   i3Pos                         = jQ.xyz;
-   i3Q                           = atomicRadii[   iAtom ].y;
-   i3Q                          *= preFactor;
-   i3BornR                       = bornRadii[     iAtom ];
-   i3AtomicR                     = atomicRadii[   iAtom ].x;
-   kAceNonPolarLoop1( i3BornR, i3AtomicR, duplicationFactor, aceForce );
-   bornForce3.xyz                = zero3;
-   bornForce3.w                  = includeAce > 0.5f ? aceForce : 0.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // etch i4 position and partial charge
-
-   iAtom.x                      += 1;
-
-   jQ                            = posq[          iAtom ];
-   i4Pos                         = jQ.xyz;
-   i4Q                           = atomicRadii[   iAtom ].y;
-   i4Q                          *= preFactor;
-   i4BornR                       = bornRadii[     iAtom ];
-   i4AtomicR                     = atomicRadii[   iAtom ].x;
-   kAceNonPolarLoop1( i4BornR, i4AtomicR, duplicationFactor, aceForce );
-   bornForce4.xyz                = zero3;
-   bornForce4.w                  = includeAce > 0.5f ? aceForce : 0.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // inner loop setup
-
-   // if dupFac == 4, I_UnRoll =2, then breaking inner loop into two segments
-   // to increase number of threads in flight
-
-   // forceStreamSz = N*RepFac/I_UnRoll
-   // forceIndex       = I_UnRoll*( a.x + a.y*forceStreamSz )
-   // whichRep      = 0 or 1
-   // jBlock        = 1 + floor[ N/(duplicationFactor*streamWidth) ]
-
-   //changed the following instruction for rounding issues on some ASICs
-   //whichRep                     = floor( forceIndex / roundedUpAtoms );
-   
-   tmp = fmod(forceIndex, roundedUpAtoms);
-   whichRep = round((forceIndex - tmp)/roundedUpAtoms);
-   
-   jBlock                       = 1 + floor( numberOfAtoms/(duplicationFactor*streamWidth ) );
-   jStart                       = whichRep*jBlock;
-
-   jEnd                         = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
-
-   jAtom.y                      = jStart;
-   jLinind                      = jAtom.y*streamWidth;
-
-   // ---------------------------------------------------------------------------------------
-
-   while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind )  > 0.9f ){
-      jAtom.x = 0.0f;
-      while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ) {
-
-         // ---------------------------------------------------------------------------------------
-
-         // gather required values
-
-         j1Pos              = posq[      jAtom ];
-         j1Q                = atomicRadii[   jAtom ].y;
-         j1BornR            = bornRadii[ jAtom ];
-         jAtom.x           += 1.0f;
-
-         j2Pos              = posq[      jAtom ];
-         j2Q                = atomicRadii[   jAtom ].y;
-         j2BornR            = bornRadii[ jAtom ];
-         jAtom.x           += 1.0f;
-
-         j3Pos              = posq[      jAtom ];
-         j3Q                = atomicRadii[   jAtom ].y;
-         j3BornR            = bornRadii[ jAtom ];
-         jAtom.x           += 1.0f;
-
-         j4Pos              = posq[      jAtom ];
-         j4Q                = atomicRadii[   jAtom ].y;
-         j4BornR            = bornRadii[ jAtom ];
-         jAtom.x           += 1.0f;
-
-         jBornR             = float4( j1BornR, j2BornR, j3BornR, j4BornR );
-         jQ                 = float4( j1Q, j2Q, j3Q, j4Q );
-
-         // ---------------------------------------------------------------------------------------
-
-         // i == 1
-
-         d1                 = i1Pos - j1Pos;
-         d2                 = i1Pos - j2Pos;
-         d3                 = i1Pos - j3Pos;
-         d4                 = i1Pos - j4Pos;
-
-         loop1Internal( d1, d2, d3, d4, jBornR, jQ, i1BornR, i1Q, dGpol_dr, dGpol_dalpha2_ij );
- 
-         bornForce1.xyz    += dGpol_dr.x*d1; 
-         bornForce1.xyz    += dGpol_dr.y*d2; 
-         bornForce1.xyz    += dGpol_dr.z*d3; 
-         bornForce1.xyz    += dGpol_dr.w*d4; 
-
-         bornForce1.w      += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
-
-         // ---------------------------------------------------------------------------------------
-
-         // i == 2
-
-         d1                 = i2Pos - j1Pos;
-         d2                 = i2Pos - j2Pos;
-         d3                 = i2Pos - j3Pos;
-         d4                 = i2Pos - j4Pos;
-
-         loop1Internal( d1, d2, d3, d4, jBornR, jQ, i2BornR, i2Q, dGpol_dr, dGpol_dalpha2_ij );
-
-         bornForce2.xyz    += dGpol_dr.x*d1; 
-         bornForce2.xyz    += dGpol_dr.y*d2; 
-         bornForce2.xyz    += dGpol_dr.z*d3; 
-         bornForce2.xyz    += dGpol_dr.w*d4; 
-
-         bornForce2.w      += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
-
-         // ---------------------------------------------------------------------------------------
-
-         // i == 3
-
-         d1                 = i3Pos - j1Pos;
-         d2                 = i3Pos - j2Pos;
-         d3                 = i3Pos - j3Pos;
-         d4                 = i3Pos - j4Pos;
-
-         loop1Internal( d1, d2, d3, d4, jBornR, jQ, i3BornR, i3Q, dGpol_dr, dGpol_dalpha2_ij );
-
-         bornForce3.xyz    += dGpol_dr.x*d1; 
-         bornForce3.xyz    += dGpol_dr.y*d2; 
-         bornForce3.xyz    += dGpol_dr.z*d3; 
-         bornForce3.xyz    += dGpol_dr.w*d4; 
-
-         bornForce3.w      += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
-
-         // ---------------------------------------------------------------------------------------
-
-         // i == 4
-
-         d1                 = i4Pos - j1Pos;
-         d2                 = i4Pos - j2Pos;
-         d3                 = i4Pos - j3Pos;
-         d4                 = i4Pos - j4Pos;
-
-         loop1Internal( d1, d2, d3, d4, jBornR, jQ, i4BornR, i4Q, dGpol_dr, dGpol_dalpha2_ij );
-
-         bornForce4.xyz    += dGpol_dr.x*d1; 
-         bornForce4.xyz    += dGpol_dr.y*d2; 
-         bornForce4.xyz    += dGpol_dr.z*d3; 
-         bornForce4.xyz    += dGpol_dr.w*d4; 
-
-         bornForce4.w      += dGpol_dalpha2_ij.x + dGpol_dalpha2_ij.y + dGpol_dalpha2_ij.z + dGpol_dalpha2_ij.w;
-
-         // ---------------------------------------------------------------------------------------
-
-         jLinind    += 4.0f;
-      }
-      jAtom.y       += 1.0f;      
-   }
-
-}
-
-// The inner loop by definition creates divergent paths.  Chances are
-// fair that we will take all sides of the branch anyway, so this
-// verion uses manual predication
-
-kernel void kPostObcLoop1_nobranch( 
-    float repfac, 
-    float atomStreamWidth, 
-    float pStreamWidth,
-    float natoms,
-    float roundNatoms,
-    float iUnroll,
-    float4 pstream1[][], 
-    float4 pstream2[][], 
-    float4 pstream3[][], 
-    float4 pstream4[][], 
-    float obcChain<>, 
-    float bornRadii<>, 
-    out float4 outstream<>,
-    out float bornRadii2Force<> )
-{
-    
-    // ---------------------------------------------------------------------------------------
-    
-    float atomIndex, forceIndex, qIndex, qOff;
-    float2 pindex;
-    float i;
-    float4 o1,o2,o3,o4;
-    float4 tmp;
-    
-    // given atom index find force indices and streams
-    
-    pindex      = indexof( outstream );
-    atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-    forceIndex  = atomIndex;
-    
-    outstream   = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-    
-    for( i = 0.0f; i < repfac; i += 1.0f ){
-        
-        qIndex         = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-        qOff           = forceIndex - iUnroll*qIndex;
-        
-        pindex.y       = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-        pindex.x       = qIndex - pindex.y*pStreamWidth;
-
-        o1             = pstream1[ pindex ];
-        o2             = pstream2[ pindex ];
-        o3             = pstream3[ pindex ];
-        o4             = pstream4[ pindex ];
-
-        tmp            = qOff < 0.5f ? o1  : o2;
-        tmp            = qOff < 1.5f ? tmp : o3;
-        tmp            = qOff < 2.5f ? tmp : o4;
-        
-        outstream     += tmp;
-        
-        forceIndex    += roundNatoms;
-    }
-    
-    bornRadii2Force  = obcChain*bornRadii*bornRadii*outstream.w;
-}
-
-
-//156 FLOPS [172 total]
-kernel void loop2Internal( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
-                           float bornForce, float iAtomicRadii, out float4 de<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float4 r, rInverse, r2, r2Inverse;
-   float4 l_ij, l_ij2;
-   float4 u_ij, u_ij2;
-   float4 t3;
-   float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
-   float4 iAtomicRadii4;
-   float4 diff_u2_l2;
-
-   const float bigValue      = 1000000.0f;
-   const float4 bigValue4    = float4( bigValue, bigValue, bigValue, bigValue );
-
-   const float smallValue    = 0.000001f;
-   const float4 smallValue4  = float4(  smallValue, smallValue, smallValue, smallValue );
-
-   const float4 zero4        = float4(  0.0f,  0.0f,  0.0f,  0.0f  );
-   const float4 one4         = float4(  1.0f,  1.0f,  1.0f,  1.0f  );
-
-   float4 mask;
-
-   // ---------------------------------------------------------------------------------------
-
-   iAtomicRadii4      = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
-
-   r2                 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
-
-   // ---------------------------------------------------------------------------------------
-
-   // not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
-   // r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
-
-   mask               = r2 < smallValue4 ? zero4     : one4;
-   // r2                 = r2 < smallValue4 ? bigValue4 : r2;
- 
-   // ---------------------------------------------------------------------------------------
-
-   r                  = sqrt( r2 ); //4
-   rInverse           = rsqrt( r2 ); //8
-   r2Inverse          = rInverse*rInverse; //4
-
-   rPlusScaledJ       = r + jAtomicRadiiScaled; //4
-   rMinusScaledJ      = r - jAtomicRadiiScaled; //4
-   rMinusScaledJAbs   = abs( rMinusScaledJ ); 
-
-   // ---------------------------------------------------------------------------------------
-
-   mask               = iAtomicRadii4 >= rPlusScaledJ     ? zero4         : mask;
-   l_ij               = iAtomicRadii4 >  rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
-
-   // ---------------------------------------------------------------------------------------
-
-   l_ij               = 1.0f/l_ij; //4
-   l_ij2              = l_ij*l_ij; //4
-
-   u_ij               = 1.0f/rPlusScaledJ; //4
-   u_ij2              = u_ij*u_ij; //4
-   diff_u2_l2         = u_ij2 - l_ij2; //4
-
-   scaledRadiusJ2     = jAtomicRadiiScaled*jAtomicRadiiScaled; //4
-   logRatio           = log(u_ij/l_ij); //4
-
-   // terms associated w/ dL/dr & dU/dr are zero and not included
-
-   t3                 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse; //7*4
-
-   de                 = mask*bornForce*t3*rInverse; //12
-
-// de = float4( 1.0f, 1.0f, 1.0f, 1.0f );
-// de = scaledRadiusJ2;
-// de = mask*t3;
-// de = mask*rPlusScaledJ;
-// de = mask*r2;
-// de = mask;
-// de = mask*rInverse;
-// de = mask*bornForce;
-
-   // Born radius term
-
-   // bornSum            =   l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
-/*
-   bornSum            =   l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2; //10*4
-
-   t3                 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4; //12 for true, but don't know how to average so saying 0
-   bornSum           += t3; //4
-   bornSum           *= mask; //4
-*/
-
-}
-
-kernel void loop2InternalR2( float4 r2, float4 jAtomicRadiiScaled,
-                             float bornForce, float iAtomicRadii, out float4 de<>, out float4 bornSum<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float4 r, rInverse, r2Inverse;
-   float4 l_ij, l_ij2;
-   float4 u_ij, u_ij2;
-   float4 t3;
-   float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
-   float4 iAtomicRadii4;
-   float4 diff_u2_l2;
-
-   const float bigValue      = 1000000.0f;
-   const float4 bigValue4    = float4( bigValue, bigValue, bigValue, bigValue );
-
-   const float smallValue    = 0.000001f;
-   const float4 smallValue4  = float4(  smallValue, smallValue, smallValue, smallValue );
-
-   const float4 zero4        = float4(  0.0f,  0.0f,  0.0f,  0.0f  );
-   const float4 one4         = float4(  1.0f,  1.0f,  1.0f,  1.0f  );
-
-   float4 mask;
-
-   // ---------------------------------------------------------------------------------------
-
-   iAtomicRadii4      = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
-
-   // ---------------------------------------------------------------------------------------
-
-   // not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
-   // r2 not masked -- assume for i = j, that 0 * infinity i 0; appears to work
-
-   mask               = r2 < smallValue4 ? zero4     : one4;
-   // r2                 = r2 < smallValue4 ? bigValue4 : r2;
- 
-   // ---------------------------------------------------------------------------------------
-
-   r                  = sqrt( r2 );
-   rInverse           = rsqrt( r2 );
-   r2Inverse          = rInverse*rInverse;
-
-   rPlusScaledJ       = r + jAtomicRadiiScaled;
-   rMinusScaledJ      = r - jAtomicRadiiScaled;
-   rMinusScaledJAbs   = abs( rMinusScaledJ );
-
-   // ---------------------------------------------------------------------------------------
-
-   mask               = iAtomicRadii4 >= rPlusScaledJ     ? zero4         : mask;
-   l_ij               = iAtomicRadii4 >  rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
-
-   // ---------------------------------------------------------------------------------------
-
-   l_ij               = 1.0f/l_ij;
-   l_ij2              = l_ij*l_ij;
-
-   u_ij               = 1.0f/rPlusScaledJ;
-   u_ij2              = u_ij*u_ij;
-   diff_u2_l2         = u_ij2 - l_ij2;
-
-   scaledRadiusJ2     = jAtomicRadiiScaled*jAtomicRadiiScaled;
-   logRatio           = log(u_ij/l_ij);
-
-   // terms associated w/ dL/dr & dU/dr are zero and not included
-
-   t3                 = -0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*diff_u2_l2 + 0.25f*logRatio*r2Inverse;
-
-   de                 = mask*bornForce*t3*rInverse;
-
-// de = float4( 1.0f, 1.0f, 1.0f, 1.0f );
-// de = scaledRadiusJ2;
-// de = mask*t3;
-// de = mask*rPlusScaledJ;
-// de = mask*r2;
-// de = mask;
-// de = mask*rInverse;
-// de = mask*bornForce;
-
-   // Born radius term
-
-   // bornSum            =   l_ij - u_ij + 0.25*r*diff_u2_l2 + 0.5f*rInverse*logRatio - (0.25*jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
-   bornSum            =   l_ij - u_ij + 0.5f*rInverse*logRatio + 0.25f*( r - jAtomicRadiiScaled*jAtomicRadiiScaled*rInverse)*diff_u2_l2;
-
-   t3                 = ( iAtomicRadii4 < (jAtomicRadiiScaled - r) ) ? 2.0f*( (1.0f/iAtomicRadii) - l_ij) : zero4;
-   bornSum           += t3;
-   bornSum           *= mask;
-
-}
-
-/* This is a copy of loop2Internal(), but w/ the calculation of bornSum removed */
-
-//112 FLOPS
-kernel void loop2InternalNoSum( float3 d1, float3 d2, float3 d3, float3 d4, float4 jAtomicRadiiScaled,
-                                float bornForce, float iAtomicRadii, out float4 de<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float4 r, rInverse, r2, r2Inverse;
-   float4 l_ij, l_ij2;
-   float4 u_ij, u_ij2;
-   float4 t3;
-   float4 logRatio, scaledRadiusJ2, rPlusScaledJ, rMinusScaledJ, rMinusScaledJAbs;
-   float4 iAtomicRadii4;
-
-   const float bigValue      = 1000000.0f;
-   const float4 bigValue4    = float4( bigValue, bigValue, bigValue, bigValue );
-
-   const float smallValue   = 0.000001f;
-   const float4 smallValue4 = float4(  smallValue, smallValue, smallValue, smallValue );
-   
-   const float4 zero4        = float4(  0.0f,  0.0f,  0.0f,  0.0f  );
-   const float4 one4         = float4(  1.0f,  1.0f,  1.0f,  1.0f  );
-   
-   float4 mask;
-
-   // ---------------------------------------------------------------------------------------
-
-   iAtomicRadii4      = float4( iAtomicRadii, iAtomicRadii, iAtomicRadii, iAtomicRadii );
-
-   r2                 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) ); //20
-
-   // ---------------------------------------------------------------------------------------
-
-   // not needed for force calculation since deltaX = deltaY = deltaZ = 0, if r = 0
-
-   // mask               = r2 < smallValue4 ? zero4     : one4;
-   // r2                 = r2 < smallValue4 ? bigValue4 : r2;
- 
-   // ---------------------------------------------------------------------------------------
-
-   r                  = sqrt( r2 ); //4
-   rInverse           = rsqrt( r2 ); //8
-   r2Inverse          = rInverse*rInverse; //4
-
-   rPlusScaledJ       = r + jAtomicRadiiScaled; //4
-   rMinusScaledJ      = r - jAtomicRadiiScaled; //4
-   rMinusScaledJAbs   = abs( rMinusScaledJ );
-
-   // ---------------------------------------------------------------------------------------
-
-   // mask               = iAtomicRadii4 >= rPlusScaledJ     ? zero4         : mask;
-   mask               = iAtomicRadii4 >= rPlusScaledJ     ? zero4         : one4;
-
-   l_ij               = iAtomicRadii4 >  rMinusScaledJAbs ? iAtomicRadii4 : rMinusScaledJAbs;
-
-   // ---------------------------------------------------------------------------------------
-
-   l_ij               = 1.0f/l_ij; //4
-   l_ij2              = l_ij*l_ij; //4
-
-   u_ij               = 1.0f/rPlusScaledJ; //4
-   u_ij2              = u_ij*u_ij; //4
-
-   scaledRadiusJ2     = jAtomicRadiiScaled*jAtomicRadiiScaled; //4
-   logRatio           = log(u_ij/l_ij); //4
-
-   // terms associated w/ dL/dr & dU/dr are zero and not included
-
-   t3                 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse; //8*4
-
-   de                 = mask*bornForce*t3*rInverse; //12
-
-}
-
-/* ---------------------------------------------------------------------------------------
-
-   Calculate Loop 2 OBC forces (Simbios) 
-
-   numberOfAtoms:       no. of atoms
-   roundedUpAtoms:      rounded up number of atoms based on unroll
-   duplicationFactor:              ?
-   strheight:           atom stream height
-   streamWidth:         atom stream width
-   fstreamWidth:        force stream width (output -- i-unroll)
-   posq:                atom positions and charge
-   atomicRadii:         atomic radii
-   scaledAtomicRadii:   scaled atomic radii
-   bornForceFactor:     (Born radii)**2*bornForce*Obcforce
-   bornForce1:          i-unroll first force component, including dBorn_r/dr in .w
-   bornForce2:          i-unroll second force component, including dBorn_r/dr in .w 
-   bornForce3:          i-unroll third force component, including dBorn_r/dr in .w 
-   bornForce4:          i-unroll fourth force component, including dBorn_r/dr in .w 
-#endif
-
-   --------------------------------------------------------------------------------------- */
-
-//1376 FLOPS 
-kernel void kObcLoop2( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor, 
-                       float streamWidth, float fstreamWidth, float3 posq[][], 
-                       float2 scaledAtomicRadii[][], float bornForceFactor[][], 
-                       out float4 bornForce1<>, out float4 bornForce2<>,
-                       out float4 bornForce3<>, out float4 bornForce4<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // atomic radii: radius - dielectric offset 
-
-   float i1AtomicRadii, i2AtomicRadii, i3AtomicRadii, i4AtomicRadii;
-   float j1AtomicRadii, j2AtomicRadii, j3AtomicRadii, j4AtomicRadii;
-   float4 iAtomicRadii, jAtomicRadii;
-
-   // scaled atomic radii: scaleFactor*( radius - dielectric offset )
-
-   float  i1ScaledAtomicRadii, i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii;
-   float  j1ScaledAtomicRadii, j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii;
-   float4 iScaledAtomicRadii, jScaledAtomicRadii;
-
-   // (Born radii**2)*BornForce*ObcChainForce
-
-   float i1BornForceFactor, i2BornForceFactor, i3BornForceFactor, i4BornForceFactor;
-   float j1BornForceFactor, j2BornForceFactor, j3BornForceFactor, j4BornForceFactor;
-   //float4 iBornForceFactor;
-   //float4 jBornForceFactor;
-
-   // i,j coordinates
-
-   float3 i1pos, i2pos, i3pos, i4pos; 
-   float3 j1pos, j2pos, j3pos, j4pos;
-
-   // delta coordinates
-
-   float3 d1, d2, d3, d4;
-
-   // indices
-
-   float2 iAtom; 
-
-   float forceIndex;
-
-   //This is forceIndex mod numberOfAtoms, the true i index
-
-   float4 de, bornSum;
-
-   float iAtomLinearIndex, jLinind; 
-   
-   float2 jAtom;
-   float jEnd, jStart, jBlock;
-   float whichRep;
-   float tmp;
-   float tmp2;
-
-
-   // ---------------------------------------------------------------------------------------
-
-   // constants
-
-   const float I_Unroll          = 4.0f;
-
-   // ---------------------------------------------------------------------------------------
-
-   // set linear index
-
-   iAtom                         = indexof( bornForce1 );
-   forceIndex                    = I_Unroll*( iAtom.x + iAtom.y*fstreamWidth );
-
-   iAtomLinearIndex              = fmod( forceIndex, roundedUpAtoms );
-
-   // ---------------------------------------------------------------------------------------
-
-   // set gather coordinates
-
-   iAtom.x                       = fmod(  iAtomLinearIndex, streamWidth );
-   iAtom.y                       = round( (iAtomLinearIndex - iAtom.x)/streamWidth );
-   
-   // ---------------------------------------------------------------------------------------
-
-   // etch i1 position, Born prefactor, atomic radius
-
-   i1pos                         = posq[                    iAtom ];
-   i1BornForceFactor             = bornForceFactor[         iAtom ];
-   i1AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-   i1ScaledAtomicRadii           = scaledAtomicRadii[       iAtom ].x;
-   bornForce1                    = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-   // etch i2 position, Born prefactor, atomic radius
-
-   iAtom.x                      += 1;
-   i2pos                         = posq[                    iAtom ];
-   i2BornForceFactor             = bornForceFactor[         iAtom ];
-   i2AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-   i2ScaledAtomicRadii           = scaledAtomicRadii[       iAtom ].x;
-   bornForce2                    = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-   // etch i3 position, Born prefactor, atomic radius
-
-   iAtom.x                      += 1;
-   i3pos                         = posq[                    iAtom ];
-   i3BornForceFactor             = bornForceFactor[         iAtom ];
-   i3AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-   i3ScaledAtomicRadii           = scaledAtomicRadii[       iAtom ].x;
-   bornForce3                    = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-   // etch i4 position, Born prefactor, atomic radius
-
-   iAtom.x                      += 1;
-   i4pos                         = posq[                    iAtom ];
-   i4BornForceFactor             = bornForceFactor[         iAtom ];
-   i4AtomicRadii                 = scaledAtomicRadii[       iAtom ].y;
-   i4ScaledAtomicRadii           = scaledAtomicRadii[       iAtom ].x;
-   bornForce4                    = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-
-   // create float4 for main vars
-
-   iAtomicRadii                  = float4( i1AtomicRadii,        i2AtomicRadii,       i3AtomicRadii,       i4AtomicRadii );
-   iScaledAtomicRadii            = float4( i1ScaledAtomicRadii,  i2ScaledAtomicRadii, i3ScaledAtomicRadii, i4ScaledAtomicRadii );
-
-   // inner loop indices setup
-   //changed the following instruction for rounding issues on some ASICs
-   //whichRep                      = floor( forceIndex / roundedUpAtoms );
-
-   tmp                           = fmod(forceIndex, roundedUpAtoms);
-   whichRep                      = round((forceIndex - tmp)/roundedUpAtoms);
-   
-   jBlock                        = 1 + round( (numberOfAtoms-fmod( numberOfAtoms, (duplicationFactor*streamWidth) )) / (duplicationFactor*streamWidth) );
-   
-   jStart                        = whichRep*jBlock;
-
-   jEnd                          = ( whichRep > duplicationFactor - 1.5f ) ? 999999.0f : (jStart + jBlock);
-
-   jAtom.y                       = jStart;
-   jLinind                       = jAtom.y*streamWidth;
-
-   // ---------------------------------------------------------------------------------------
-
-   while ( jAtom.y < jEnd && ( numberOfAtoms - jLinind )  > 0.9f ){
-      jAtom.x = 0.0f;
-      while ( jAtom.x < streamWidth && ( numberOfAtoms - jLinind ) > 0.9f ){ 
-
-      // ---------------------------------------------------------------------------------------
-
-      // gather required values
-      
-          
-         j1BornForceFactor    = bornForceFactor[         jAtom ];
-         j1AtomicRadii        = scaledAtomicRadii[       jAtom ].y;
-         j1ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j1pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j2BornForceFactor    = bornForceFactor[         jAtom ];
-         j2AtomicRadii        = scaledAtomicRadii[       jAtom ].y;
-         j2ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j2pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j3BornForceFactor    = bornForceFactor[         jAtom ];
-         j3AtomicRadii        = scaledAtomicRadii[       jAtom ].y;
-         j3ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j3pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-
-         j4BornForceFactor    = bornForceFactor[         jAtom ];
-         j4AtomicRadii        = scaledAtomicRadii[       jAtom ].y;
-         j4ScaledAtomicRadii  = scaledAtomicRadii[       jAtom ].x;
-         j4pos                = posq[                    jAtom ];
-         jAtom.x             += 1.0f;
-         
-   
-         jAtomicRadii         = float4( j1AtomicRadii,        j2AtomicRadii,       j3AtomicRadii,       j4AtomicRadii       );
-         jScaledAtomicRadii   = float4( j1ScaledAtomicRadii,  j2ScaledAtomicRadii, j3ScaledAtomicRadii, j4ScaledAtomicRadii );
-   
-         // ---------------------------------------------------------------------------------------
-   
-         // First set of 4 -- i == 1
-   
-         d1                 = j1pos - i1pos; //3
-         d2                 = j2pos - i1pos; //3
-         d3                 = j3pos - i1pos; //3
-         d4                 = j4pos - i1pos; //3
-   
-         // i = 1
-
-         loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i1BornForceFactor, i1AtomicRadii, de ); //156 : 368
-   
-         bornForce1.xyz    += de.x*d1; //3*2=6
-         bornForce1.xyz    += de.y*d2; //3*2=6
-         bornForce1.xyz    += de.z*d3; //3*2=6
-         bornForce1.xyz    += de.w*d4; //3*2=6
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 2
-
-         d1                 = j1pos - i2pos; //3
-         d2                 = j2pos - i2pos; //3
-         d3                 = j3pos - i2pos; //3
-         d4                 = j4pos - i2pos; //3
-   
-         loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i2BornForceFactor, i2AtomicRadii, de );//156 : 368
-   
-         bornForce2.xyz    += de.x*d1; //3*2=6
-         bornForce2.xyz    += de.y*d2; //3*2=6
-         bornForce2.xyz    += de.z*d3; //3*2=6
-         bornForce2.xyz    += de.w*d4; //3*2=6
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 3
-
-         d1                 = j1pos - i3pos; //3
-         d2                 = j2pos - i3pos; //3
-         d3                 = j3pos - i3pos; //3
-         d4                 = j4pos - i3pos; //3
-
-         loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i3BornForceFactor, i3AtomicRadii, de );//156 : 368
-   
-         bornForce3.xyz    += de.x*d1; //3*2=6
-         bornForce3.xyz    += de.y*d2; //3*2=6
-         bornForce3.xyz    += de.z*d3; //3*2=6
-         bornForce3.xyz    += de.w*d4; //3*2=6
-   
-         // ---------------------------------------------------------------------------------------
-
-         // i = 4
-
-         d1                 = j1pos - i4pos; //3
-         d2                 = j2pos - i4pos; //3
-         d3                 = j3pos - i4pos; //3
-         d4                 = j4pos - i4pos; //3
-
-         loop2Internal( d1, d2, d3, d4, jScaledAtomicRadii, i4BornForceFactor, i4AtomicRadii, de );//156 : 366
-   
-         bornForce4.xyz    += de.x*d1; //3*2=6
-         bornForce4.xyz    += de.y*d2; //3*2=6
-         bornForce4.xyz    += de.z*d3; //3*2=6
-         bornForce4.xyz    += de.w*d4; //3*2=6
-
-         // ---------------------------------------------------------------------------------------
-
-         // j = 1
-
-         d1                 = j1pos - i1pos; //3
-         d2                 = j1pos - i2pos; //3
-         d3                 = j1pos - i3pos; //3
-         d4                 = j1pos - i4pos; //3
-   
-         loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j1BornForceFactor, j1AtomicRadii, de ); //112 : 304
-   
-         bornForce1.xyz    += de.x*d1; //3*2=6
-         bornForce2.xyz    += de.y*d2; //3*2=6
-         bornForce3.xyz    += de.z*d3; //3*2=6
-         bornForce4.xyz    += de.w*d4; //3*2=6
-
-         // ---------------------------------------------------------------------------------------
-
-         // j = 2
-
-         d1                 = j2pos - i1pos; //3
-         d2                 = j2pos - i2pos; //3
-         d3                 = j2pos - i3pos; //3
-         d4                 = j2pos - i4pos; //3
-   
-         loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j2BornForceFactor, j2AtomicRadii, de ); //112 : 304
-   
-         bornForce1.xyz    += de.x*d1; //3*2=6
-         bornForce2.xyz    += de.y*d2; //3*2=6
-         bornForce3.xyz    += de.z*d3; //3*2=6
-         bornForce4.xyz    += de.w*d4; //3*2=6
-
-         // ---------------------------------------------------------------------------------------
-
-         // j = 3
-
-         d1                 = j3pos - i1pos; //3
-         d2                 = j3pos - i2pos; //3
-         d3                 = j3pos - i3pos; //3
-         d4                 = j3pos - i4pos; //3
-   
-         loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j3BornForceFactor, j3AtomicRadii, de ); //112 : 304
-   
-         bornForce1.xyz    += de.x*d1; //3*2=6
-         bornForce2.xyz    += de.y*d2; //3*2=6
-         bornForce3.xyz    += de.z*d3; //3*2=6
-         bornForce4.xyz    += de.w*d4; //3*2=6
-
-         // ---------------------------------------------------------------------------------------
-
-         // j = 4
-
-         d1                 = j4pos - i1pos; //3
-         d2                 = j4pos - i2pos; //3
-         d3                 = j4pos - i3pos; //3
-         d4                 = j4pos - i4pos; //3
-   
-         loop2InternalNoSum( d1, d2, d3, d4, iScaledAtomicRadii, j4BornForceFactor, j4AtomicRadii, de ); //112 : 304
-   
-         bornForce1.xyz    += de.x*d1; //3*2=6
-         bornForce2.xyz    += de.y*d2; //3*2=6
-         bornForce3.xyz    += de.z*d3; //3*2=6
-         bornForce4.xyz    += de.w*d4; //3*2=6
-
-         // ---------------------------------------------------------------------------------------
-   
-         // increment linear index by 4   
-   
-         jLinind           += 4.0f;
-   
-      }
-      jAtom.y       += 1.0f;
-   }
-}
-
-kernel void kPostObcLoop2_nobranch( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float conversion,
-      float mergeNonObcForces,
-      float4 inObcForces<>, 
-      float3 nonObcForces<>, 
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      float4 pstream3[][], 
-      float4 pstream4[][], 
-      float  atomicRadii<>,
-      out float bornRadii<>,
-      out float obcChain<>,
-      out float3 outForces<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   float sum2, sum3, bornSum, tanhSum, atomicRadiiOffset, obcIntermediate;
-   float4 o1,o2,o3,o4;
-   float4 tmp;
-
-   float2 iAtom; 
-   float4 forces;
-   float expPlus, expMinus;
-
-   // ---------------------------------------------------------------------------------------
-
-   // constants -- OBC Type II
-
-   const float alphaObc          = 1.0f;
-   const float betaObc           = 0.8f;
-   const float gammaObc          = 4.85f;
-   const float dielectricOffset  = 0.009f;
-
-   // ---------------------------------------------------------------------------------------
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( outForces );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   // add current forces in inStream to forces stored in pstreams
-   // the .w entry is Born sum values; it will be used to calculate the
-   // Born radii and obcChain term
-
-   forces   = inObcForces;
-   forces.w = 0.0f;
-//forces   = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-   
-   // sum over j-loop 'duplications' by gathering from pstreams
-
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      // qIndex            = floor( forceIndex/iUnroll );
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-
-      qOff              = forceIndex - iUnroll*qIndex;
-
-      // pindex.y          = floor( qIndex/ pStreamWidth );
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-
-      // pindex.x          = qIndex - pindex.y*pStreamWidth + qOff;
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-      o1 = pstream1[ pindex ];
-      o2 = pstream2[ pindex ];
-      o3 = pstream3[ pindex ];
-      o4 = pstream4[ pindex ];
-      
-      tmp = qOff < 0.5f ? o1 : o2;
-      tmp = qOff < 1.5f ? tmp : o3;
-      tmp = qOff < 2.5f ? tmp : o4;
-      
-      forces += tmp;
-        
-      forceIndex += roundNatoms;
-      
-   }
-
-   // compute Born radii and ObcChain
-
-   atomicRadiiOffset            = atomicRadii - dielectricOffset;
-
-   bornSum                      = forces.w;
-   bornSum                     *= 0.5f*atomicRadiiOffset;
-
-   sum2                         = bornSum*bornSum;
-   sum3                         = bornSum*sum2;
-
-   // Tanh does not exist? 
-   // calculate [ exp(x) - exp(-x) ]/[ exp(x) + exp(-x) ]
-
-   // tanhSum                      = tanh( bornSum - betaObc*sum2 + gammaObc*sum3 );
-
-   tanhSum                      = bornSum - betaObc*sum2 + gammaObc*sum3;
-   expPlus                      = exp( tanhSum );
-   expMinus                     = 1.0f/expPlus;
-   tanhSum                      = ( expPlus - expMinus )/( expPlus + expMinus );
-    
-   bornRadii                    = 1.0f/( (1.0f/(atomicRadiiOffset)) - tanhSum/atomicRadii );  
- 
-   obcIntermediate              = atomicRadiiOffset*( alphaObc - 2.0f*betaObc*bornSum + 3.0f*gammaObc*sum2 );
-   obcChain                     = (1.0f - tanhSum*tanhSum)*obcIntermediate/atomicRadii;
-   if( atomIndex >= natoms ){
-      bornRadii = 0.0f;
-      obcChain  = 0.0f;
-   }
-
-   // add converted new forces to non-Obc forces
-
-   outForces = conversion*forces.xyz;
-   if( mergeNonObcForces > 0.1f ){
-      outForces += nonObcForces; 
-   }
-
-}
--- a/platforms/brook/src/kernels/kgbsa.h
+++ b/platforms/brook/src/kernels/kgbsa.h
-#ifndef __KGBSA_H__
-#define __KGBSA_H__
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-void kMergeFloat4( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float iUnroll,
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream outputStream 
-                 );
-
-void kMergeFloat4_4( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream outputStream 
-                 );
-
-void kMergeFloat4_4X( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream outputStream 
-                 );
-
-void kCheck( 
-      const float natoms,
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float unroll,
-      ::brook::stream stream1
-                 );
-
-void kAddAndMergeFloat4( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float iUnroll,
-      ::brook::stream inStream, 
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream outputStream 
-                 );
-
-void kAddAndMergeFloat4_4( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      ::brook::stream inStream, 
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream outputStream 
-                 );
-
-void kPostObcLoop2( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      const float conversion,
-      const float mergeNonObcForces,
-      ::brook::stream inObcForces, 
-      ::brook::stream nonObcForces, 
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream atomicRadii,
-      ::brook::stream bornRadii,
-      ::brook::stream obcChain,
-      ::brook::stream outputStream 
-                 );
-
-
-void kPostObcLoop2_nobranch( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      const float conversion,
-      const float mergeNonObcForces,
-      ::brook::stream inObcForces, 
-      ::brook::stream nonObcForces, 
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream atomicRadii,
-      ::brook::stream bornRadii,
-      ::brook::stream obcChain,
-      ::brook::stream outputStream 
-                 );
-
-void kPostCalculateBornRadii_nobranch( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      const float conversion,
-      const float mergeNonObcForces,
-      ::brook::stream stream1, 
-      ::brook::stream atomicRadii,
-      ::brook::stream bornRadii,
-      ::brook::stream obcChain
-                 );
-
-void kPreGbsaForce2( 
-      ::brook::stream intermediateForceIn, 
-      ::brook::stream bornRadii, 
-      ::brook::stream bornRadii2Force 
-                 );
-
-void kPreObcForce2( 
-      ::brook::stream intermediateForceIn, 
-      ::brook::stream obcChainForceIn, 
-      ::brook::stream bornRadii, 
-      ::brook::stream bornRadii2Force 
-                 );
-
-void kPostObcLoop1( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream obcChainForceIn, 
-      ::brook::stream bornRadii, 
-      ::brook::stream outputStream, 
-      ::brook::stream bornRadii2Force 
-                 );
-
-
-void kPostObcLoop1_nobranch( 
-      const float repfac, 
-      const float atomStrWidth, 
-      const float pforceStrWidth,
-      const float natoms,
-      const float roundNatoms,
-      const float iUnroll,
-      ::brook::stream stream1, 
-      ::brook::stream stream2, 
-      ::brook::stream stream3, 
-      ::brook::stream stream4, 
-      ::brook::stream obcChainForceIn, 
-      ::brook::stream bornRadii, 
-      ::brook::stream outputStream, 
-      ::brook::stream bornRadii2Force 
-                 );
-
-void kCopyFloat4( 
-      ::brook::stream stream1, 
-      ::brook::stream stream2
-                 );
-
-void kObcLoop1(
-      const float natoms, 
-      const float roundedUpAtoms, 
-      const float dupfac, 
-      const float strwidth, 
-      const float fstrwidth,
-      const float soluteDielectric,
-      const float solventDielectric,
-      const float includeAce,
-      ::brook::stream posq, 
-      ::brook::stream bornRadii, 
-      ::brook::stream atomicRadii, 
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-      ::brook::stream bornForce3,
-      ::brook::stream bornForce4
-      );
-
-void kCalculateGbsaForce1_4 (
-      const float natoms, 
-      const float roundedUpAtoms, 
-      const float dupfac, 
-      const float strwidth, 
-      const float fstrwidth,
-      const float soluteDielectric,
-      const float solventDielectric,
-      ::brook::stream posq, 
-      ::brook::stream bornRadii, 
-      ::brook::stream nonpolarForce, 
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-      ::brook::stream bornForce3,
-      ::brook::stream bornForce4
-      );
-
-void kCalculateGbsaForce1_4X (
-      const float natoms, 
-      const float roundedUpAtoms, 
-      const float dupfac, 
-      const float strwidth, 
-      const float fstrwidth,
-      const float soluteDielectric,
-      const float solventDielectric,
-      ::brook::stream posq, 
-      ::brook::stream bornRadii, 
-      ::brook::stream nonpolarForce, 
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-      ::brook::stream bornForce3,
-      ::brook::stream bornForce4
-      );
-
-void kCalculateGbsaForce2_4 (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2
-      );
-
-void kCalculateGbsaForce2_4Debug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kCalculateGbsaForce2_4Excl (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2
-      );
-
-void kCalculateGbsaForce2_4ExclDebug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kCalculateGbsaForce2_2 (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2
-      );
-
-void kCalculateGbsaForce2_2Debug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kCalculateGbsaForce2_2Excl (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2
-      );
-
-void kCalculateGbsaForce2_2ExclDebug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kBornRadii(
-      ::brook::stream gpolNonBonded, 
-      ::brook::stream gPolFixed,
-      ::brook::stream bornRadii
-      );
-
-void kPostGbsaForce2Debug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream inBornForce2, 
-		::brook::stream gPolFixed,
-      ::brook::stream  bornRadii,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kCalculateGbsaForce2_1_4 (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1
-      );
-
-void kCalculateGbsaForce2_1_4Debug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kCalculateGbsaForce2_1_4Excl (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1
-      );
-
-void kCalculateGbsaForce2_1_4ExclDebug (
-      float natoms, 
-      float dupfac, 
-      float strwidth, 
-      float fstrwidth,
-      const float debugIAtom,
-      const float debugJAtom,
-      const float debugReport,
-      ::brook::stream posq, 
-      ::brook::stream  bornRadii,
-      ::brook::stream vdwRadii, 
-      ::brook::stream volume, 
-      ::brook::stream excl,
-      ::brook::stream excludeBlockIndices,
-      ::brook::stream inBornForce,
-      ::brook::stream bornForce1,
-		::brook::stream debugStream1,
-		::brook::stream debugStream2,
-		::brook::stream debugStream3,
-		::brook::stream debugStream4
-      );
-
-void kObcLoop2(
-      float numberOfAtoms, 
-      float roundNatoms,
-      float duplicationFactor, 
-      float streamWidth, 
-      float fstreamWidth,
-      ::brook::stream posq, 
-      ::brook::stream scaledAtomicRadii, 
-      ::brook::stream bornForceFactor, 
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-      ::brook::stream bornForce3,
-      ::brook::stream bornForce4
-      );
-
-void kCalculateBornRadii(
-      float numberOfAtoms, 
-      float roundNatoms,
-      float duplicationFactor, 
-      float streamWidth, 
-      float fstreamWidth,
-      ::brook::stream posq, 
-      ::brook::stream scaledAtomicRadii, 
-      ::brook::stream bornForce1
-      );
-
-void kObcBornRadii(
-      float numberOfAtoms, 
-      float streamWidth, 
-      ::brook::stream bornForce, 
-      ::brook::stream atomicRadii, 
-      ::brook::stream bornRadii,
-      ::brook::stream obcChain
-      );
-
-void kAceNonPolar(
-      float soluteDielectric,
-      float solventDielectric,
-      ::brook::stream posq, 
-      ::brook::stream bornRadii, 
-      ::brook::stream vdwRadii, 
-      ::brook::stream bornForce
-      );
-
-void kCalculateBornRadii(
-      float numberOfAtoms, 
-      float roundNatoms,
-      float duplicationFactor, 
-      float streamWidth, 
-      float fstreamWidth,
-      ::brook::stream posq, 
-      ::brook::stream atomicRadii, 
-      ::brook::stream scaledAtomicRadii, 
-      ::brook::stream bornForceFactor, 
-      ::brook::stream bornForce1,
-      ::brook::stream bornForce2,
-      ::brook::stream bornForce3,
-      ::brook::stream bornForce4
-      );
-
-void kZeroFloat4( ::brook::stream bornForce );
-void kSetValue4( float value, ::brook::stream bornForce );
-void kSetValue3( float value, ::brook::stream bornForce );
-void kSetValue2( float value, ::brook::stream bornForce );
-void kSetValue1( float value, ::brook::stream bornForce );
-
-void kCopyFloat3To4( ::brook::stream inForce, ::brook::stream outForce );
-
-void kObcLoop2Cdlj4( float numberOfAtoms, float roundedUpAtoms, float duplicationFactor, 
-                     float streamWidth, float fstreamWidth,
-                     float jstreamWidth, float epsfac, 
-                     ::brook::stream posq, 
-                     ::brook::stream atomicRadii,
-                     ::brook::stream scaledAtomicRadii, 
-                     ::brook::stream bornForceFactor,
-                     ::brook::stream isigeps,
-                     ::brook::stream sigma,
-                     ::brook::stream epsilon,
-                     ::brook::stream excl,
-                     ::brook::stream bornForce1,
-                     ::brook::stream bornForce2,
-                     ::brook::stream bornForce3,
-                     ::brook::stream bornForce4 );
-
-#endif //__KGBSA_H__
--- a/platforms/brook/src/kernels/kinvmap_gather.br
+++ b/platforms/brook/src/kernels/kinvmap_gather.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-//Gather kernel for use with diferent angle, dihedral and improper functions
-//
-//For small systems, the overhead of calling the kernel is so high
-//That we have to minimize the number of kernel calls. At the same
-//time we don't want to waste reads, so I'm providing a bunch of kernels
-//here that are unrolled to diferent extents. Use the appropriate one
-//depending on how many inverse maps there are, rather than looping over
-//the simple one.
-
-
-//helper function to make the unrolling look better
-kernel float3 do_gather( float strwidth, float4 invmap<>, float3 forces[][] ) {
-	float3 f;
-	float4 quotient, remainder;
-	float2 idx;
-
-	f = float3( 0.0f, 0.0f, 0.0f );
-	
-	//Convert from linear to 2D index
-	// quotient  = floor( invmap / strwidth );
-	quotient  = round( ( invmap - fmod(invmap, strwidth))/strwidth ); 
-	remainder = invmap - quotient * strwidth;
-	
-	//Add each force only if non-negative
-	if ( invmap.x >= 0.0f ) {
-		idx.y = quotient.x;
-		idx.x = remainder.x;
-		f = forces[ idx ];
-	}
-
-	if ( invmap.y >= 0.0f ) {
-		idx.y = quotient.y;
-		idx.x = remainder.y;
-		f += forces[ idx ];
-	}
-	
-	if ( invmap.z >= 0.0f ) {
-		idx.y = quotient.z;
-		idx.x = remainder.z;
-		f += forces[ idx ];
-	}
-
-	if ( invmap.w >= 0.0f ) {
-		idx.y = quotient.w;
-		idx.x = remainder.w;
-		f += forces[ idx ];
-	}
-
-	return f;
-}
-
-//helper function to make the unrolling look better
-
-kernel float3 do_gather_nobranch( float strwidth, float4 invmap<>, float3 forces[][] ) {
-    
-    float3 f1, f2, f3, f4, f, z, t;    
-    float4 quotient, remainder;    
-    float2 idx;    
-    float m1, m2, m3, m4;    
-        
-    z = float3( 0.0f, 0.0f, 0.0f );    
-    f=z;    
-        
-    //Convert from linear to 2D index    
-    // quotient  = floor( invmap / strwidth );    
-    quotient  = round( ( invmap - fmod(invmap, strwidth))/strwidth );     
-    remainder = invmap - quotient * strwidth;    
-    
-    m1 = invmap.x;    
-    m2 = invmap.y;    
-    m3 = invmap.z;    
-    m4 = invmap.w;    
-    
-    //Add each force only if non-negative    
-    idx.y = quotient.x;    
-    idx.x = remainder.x;    
-    f1 = forces[ idx ];    
-    
-    idx.y = quotient.y;    
-    idx.x = remainder.y;    
-    f2 = forces[ idx ];
-    
-    idx.y = quotient.z;    
-    idx.x = remainder.z;    
-    f3 = forces[ idx ];
-    
-    idx.y = quotient.w;    
-    idx.x = remainder.w;    
-    f4 = forces[ idx ];
-    
-    f = (m1 >= 0.0f) ? f1 : z;    
-    t = (m2 >= 0.0f) ? f2 : z;    
-    f = f+t;    
-    t = (m3 >= 0.0f) ? f3 : z;    
-    f = f+t;    
-    t = (m4 >= 0.0f) ? f4 : z;    
-    f = f+t;
-    
-    return f;
-    
-}
-
-
-
-//Simple version, takes only one index stream
-kernel void kinvmap_gather(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap<>,   //indices into the dihedral forces
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap, forces );
-}
-
-//Takes two inverse maps
-kernel void kinvmap_gather2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap1<>,   //indices into the dihedral forces
-		float4 invmap2<>,   //indices into the dihedral forces
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap1, forces );
-	outforce += do_gather_nobranch( strwidth, invmap2, forces );
-}
-
-//Takes three inverse maps
-kernel void kinvmap_gather3(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap1<>,   //indices into the dihedral forces
-		float4 invmap2<>,   //indices into the dihedral forces
-		float4 invmap3<>,
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap1, forces );
-	outforce += do_gather_nobranch( strwidth, invmap2, forces );
-	outforce += do_gather_nobranch( strwidth, invmap3, forces );
-}
-
-//Takes four inverse maps
-kernel void kinvmap_gather4(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap1<>,   //indices into the dihedral forces
-		float4 invmap2<>,   //indices into the dihedral forces
-		float4 invmap3<>,
-		float4 invmap4<>,
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap1, forces );
-	outforce += do_gather_nobranch( strwidth, invmap2, forces );
-	outforce += do_gather_nobranch( strwidth, invmap3, forces );
-	outforce += do_gather_nobranch( strwidth, invmap4, forces );
-}
-
-//Takes five inverse maps
-kernel void kinvmap_gather5(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap1<>,   //indices into the dihedral forces
-		float4 invmap2<>,   //indices into the dihedral forces
-		float4 invmap3<>,
-		float4 invmap4<>,
-		float4 invmap5<>,
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap1, forces );
-	outforce += do_gather_nobranch( strwidth, invmap2, forces );
-	outforce += do_gather_nobranch( strwidth, invmap3, forces );
-	outforce += do_gather_nobranch( strwidth, invmap4, forces );
-	outforce += do_gather_nobranch( strwidth, invmap5, forces );
-}
-
-//Takes six inverse maps - this is the last one!
-kernel void kinvmap_gather6(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap1<>,   //indices into the dihedral forces
-		float4 invmap2<>,   //indices into the dihedral forces
-		float4 invmap3<>,
-		float4 invmap4<>,
-		float4 invmap5<>,
-		float4 invmap6<>,
-		float3 forces[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap1, forces );
-	outforce += do_gather_nobranch( strwidth, invmap2, forces );
-	outforce += do_gather_nobranch( strwidth, invmap3, forces );
-	outforce += do_gather_nobranch( strwidth, invmap4, forces );
-	outforce += do_gather_nobranch( strwidth, invmap5, forces );
-	outforce += do_gather_nobranch( strwidth, invmap6, forces );
-}
-
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather2_1(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-}
-
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather2_2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-}
-
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather2_3(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-}
-
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather2_4(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float4 invmap4_4<>,
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
-}
-
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather2_5(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float4 invmap4_4<>,
-		float4 invmap4_5<>,
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_5, forces4 );
-}
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather3_1(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float4 invmap3_3<>,
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-}
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather3_2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float4 invmap3_3<>,
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-}
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather3_3(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float4 invmap3_3<>,
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-}
-
-//Takes three + four inverse maps
-kernel void kinvmap_gather3_4(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1<>,   //indices into the dihedral forces
-		float4 invmap3_2<>,   //indices into the dihedral forces
-		float4 invmap3_3<>,
-		float3 forces3[][], //dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float4 invmap4_4<>,
-		float3 forces4[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2, forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_3, forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
-}
-
-//Takes three + five inverse maps
-kernel void kinvmap_gather3_5(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1[][],   //indices into the dihedral forces
-		float4 invmap3_2[][],   //indices into the dihedral forces
-		float4 invmap3_3[][],
-		float3 forces3[][], //dihedral forces
-		float4 invmap5_1[][],   //indices into the dihedral forces
-		float4 invmap5_2[][],   //indices into the dihedral forces
-		float4 invmap5_3[][],
-		float4 invmap5_4[][],
-		float4 invmap5_5[][],
-		float3 forces5[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	float2 idx = indexof(outforce);
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_2[idx], forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap3_3[idx], forces3 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_2[idx], forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_3[idx], forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_4[idx], forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_5[idx], forces5 );
-}
-
-
-//Takes one + one inverse maps
-kernel void kinvmap_gather1_1(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap3_1[][],   //indices into the dihedral forces
-		float3 forces3[][], //dihedral forces
-		float4 invmap5_1[][],   //indices into the dihedral forces
-		float3 forces5[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	float2 idx = indexof(outforce);
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
-	outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
-}
-
-
-//Takes five + two inverse maps
-kernel void kinvmap_gather5_2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap5_1<>,   //indices into the dihedral forces
-		float4 invmap5_2<>,   //indices into the dihedral forces
-		float4 invmap5_3<>,
-		float4 invmap5_4<>,
-		float4 invmap5_5<>,
-		float3 forces5[][], //dihedral forces
-		float4 invmap2_1<>,   //indices into the dihedral forces
-		float4 invmap2_2<>,   //indices into the dihedral forces
-		float3 forces2[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
-}
-
-
-//Takes five + two inverse maps
-kernel void kinvmap_gather5_3(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap5_1<>,   //indices into the dihedral forces
-		float4 invmap5_2<>,   //indices into the dihedral forces
-		float4 invmap5_3<>,
-		float4 invmap5_4<>,
-		float4 invmap5_5<>,
-		float3 forces5[][], //dihedral forces
-		float4 invmap2_1<>,   //indices into the dihedral forces
-		float4 invmap2_2<>,   //indices into the dihedral forces
-		float4 invmap2_3<>,   //indices into the dihedral forces
-		float3 forces2[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_5, forces5 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
-}
-
-
-//Takes five + two inverse maps
-kernel void kinvmap_gather4_3(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap5_1<>,   //indices into the dihedral forces
-		float4 invmap5_2<>,   //indices into the dihedral forces
-		float4 invmap5_3<>,
-		float4 invmap5_4<>,
-		float3 forces5[][], //dihedral forces
-		float4 invmap2_1<>,   //indices into the dihedral forces
-		float4 invmap2_2<>,   //indices into the dihedral forces
-		float4 invmap2_3<>,   //indices into the dihedral forces
-		float3 forces2[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_2, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_3, forces5 );
-	outforce += do_gather_nobranch( strwidth, invmap5_4, forces5 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_3, forces2 );
-}
-
-
-//Takes five + two inverse maps
-kernel void kinvmap_gather1_2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap5_1<>,   //indices into the dihedral forces
-		float3 forces5[][], //dihedral forces
-		float4 invmap2_1<>,   //indices into the dihedral forces
-		float4 invmap2_2<>,   //indices into the dihedral forces
-		float3 forces2[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap5_1, forces5 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
-}
-
-//Takes five + two inverse maps
-kernel void kinvmap_gather4_2(
-		float strwidth,    //stream width of the dihedral forces
-		float4 invmap4_1<>,   //indices into the dihedral forces
-		float4 invmap4_2<>,   //indices into the dihedral forces
-		float4 invmap4_3<>,
-		float4 invmap4_4<>,
-		float3 forces4[][], //dihedral forces
-		float4 invmap2_1<>,   //indices into the dihedral forces
-		float4 invmap2_2<>,   //indices into the dihedral forces
-		float3 forces2[][], //dihedral forces
-		float3 inforce<>,  //particle forces before
-		out float3 outforce<> //particle forces after
-		)
-{
-	outforce = inforce;
-
-	outforce += do_gather_nobranch( strwidth, invmap4_1, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_2, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_3, forces4 );
-	outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
-    
-	outforce += do_gather_nobranch( strwidth, invmap2_1, forces2 );
-	outforce += do_gather_nobranch( strwidth, invmap2_2, forces2 );
-}
-
-
-kernel float3 etch_force( 
-		float fpos, 
-		float strwidth,
-		float3 fi[][], 
-		float3 fj[][],
-		float3 fk[][],
-		float3 fl[][]
-		)
-{
-	float2 ind;
-	float _fpos;
-
-	_fpos = fpos;
-	
-	if ( _fpos > 300000.0f ) {
-		_fpos = _fpos - 300000.0f;
-		//ind.y = floor( _fpos / strwidth );
-      ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
-		ind.x = _fpos - ind.y * strwidth;
-
-		return fl[ ind ];
-	}
-	else if ( _fpos > 200000.0f ) {
-		_fpos = _fpos - 200000.0f;
-		//ind.y = floor( _fpos / strwidth );
-      ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
-		ind.x = _fpos - ind.y * strwidth;
-
-		return fk[ ind ];
-	}
-	else if ( _fpos > 100000.0f ) {
-		_fpos = _fpos - 100000.0f;
-		//ind.y = floor( _fpos / strwidth );
-      ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
-		ind.x = _fpos - ind.y * strwidth;
-
-		return fj[ ind ];
-	}
-	else if ( _fpos >= -0.5f ) {
-		//ind.y = floor( _fpos / strwidth );
-      ind.y = round( ( _fpos - fmod( _fpos, strwidth))/strwidth );
-		ind.x = _fpos - ind.y * strwidth;
-
-		return fi[ ind ];
-	}
-	else 
-		return 0.0f;
-	
-}
-
-
-//For-loop version doesn't work
-//Using a merged version of the above
-
-kernel float2 linear_to_2D( float linind, float width )
-{
-	float2 ind;
-	//ind.y = floor( linind / width );
-   ind.y = round( ( linind - fmod( linind, width))/width );
-	ind.x = linind - ind.y * width;
-
-	return ind;
-}
-
-//helper function to make the unrolling look better
-kernel float3 do_gather_merged_single( float strwidth, float invmap, 
-		float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][] ) {
-	
-	float3 f;
-	float2 idx;
-	float _invmap;
-	float n;
-
-	_invmap = invmap;
-
-	n = floor( _invmap / 100000.0f );
-	_invmap -= n * 100000.0f;
-	idx = linear_to_2D( _invmap, strwidth );
-
-	if ( n > 2.5f ) { 
-		f = fl[ idx ];
-	}
-	else if ( n > 1.5f ) {
-		f = fk[ idx ];
-	}
-	else if ( n > 0.5f ) {
-		f = fj[ idx ];
-	}
-	else if( n > -0.5f ) {
-		f = fi[ idx ];
-	}
-
-	return f;
-}
-
-kernel float3 do_gather_merged( float strwidth, float4 invmap,
-		float3 fi[][], float3 fj[][], float3 fk[][], float3 fl[][])
-{
-	float3 f;
-	f = do_gather_merged_single( strwidth, invmap.x, fi, fj, fk, fl )
-	    + do_gather_merged_single( strwidth, invmap.y, fi, fj, fk, fl )
-	    + do_gather_merged_single( strwidth, invmap.z, fi, fj, fk, fl )
-	    + do_gather_merged_single( strwidth, invmap.w, fi, fj, fk, fl );
-
-	return f;
-}
-
-kernel void kinvmap_gather_merged5(
-		float natoms,      //number of atoms
-		float strwidth,    //stream width of out-of-order forces
-		float4 invmap0<>, 
-		float4 invmap1<>, 
-		float4 invmap2<>, 
-		float4 invmap3<>, 
-		float4 invmap4<>, 
-		float3 fi[][],     //i-forces
-		float3 fj[][],     //j-forces
-		float3 fk[][],     //k-forces
-		float3 fl[][],     //l-forces
-		float3 inforce<>,  
-		out float3 outforce<>
-		)
-{
-	outforce = inforce;
-	outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap1, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap2, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap3, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap4, fi, fj, fk, fl );
-}
-
-kernel void kinvmap_gather_merged9(
-		float natoms,      //number of atoms
-		float strwidth,    //stream width of out-of-order forces
-		float4 invmap0<>, 
-		float4 invmap1<>, 
-		float4 invmap2<>, 
-		float4 invmap3<>, 
-		float4 invmap4<>, 
-		float4 invmap5<>, 
-		float4 invmap6<>, 
-		float4 invmap7<>, 
-		float4 invmap8<>, 
-		float3 fi[][],     //i-forces
-		float3 fj[][],     //j-forces
-		float3 fk[][],     //k-forces
-		float3 fl[][],     //l-forces
-		float3 inforce<>,  
-		out float3 outforce<>
-		)
-{
-	outforce = inforce;
-	outforce += do_gather_merged( strwidth, invmap0, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap1, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap2, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap3, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap4, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap5, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap6, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap7, fi, fj, fk, fl )
-	            + do_gather_merged( strwidth, invmap8, fi, fj, fk, fl );
-}
--- a/platforms/brook/src/kernels/kinvmap_gather.h
+++ b/platforms/brook/src/kernels/kinvmap_gather.h
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-void  kinvmap_gather (const float  strwidth,
-		::brook::stream invmap,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce); 
-
-void  kinvmap_gather2 (const float  strwidth,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce); 
-
-void  kinvmap_gather3 (const float  strwidth,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather4 (const float  strwidth,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream invmap4,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather5 (const float  strwidth,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream invmap4,
-		::brook::stream invmap5,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather6 (const float  strwidth,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream invmap4,
-		::brook::stream invmap5,
-		::brook::stream invmap6,
-		::brook::stream forces,
-		::brook::stream inforce,
-		::brook::stream outforce); 
-
-
-void  kinvmap_gather2_2 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather2_1 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather2_3 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather1_2 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather2_4 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream invmap4_4,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather2_5 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream invmap4_4,
-		::brook::stream invmap4_5,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather3_2 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream invmap3_3,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather3_1 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream invmap3_3,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather3_3 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream invmap3_3,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather3_4 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream invmap3_3,
-		::brook::stream forces3,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream invmap4_4,
-		::brook::stream forces4,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-void  kinvmap_gather3_5 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream invmap3_2,
-		::brook::stream invmap3_3,
-		::brook::stream forces3,
-		::brook::stream invmap5_1,
-		::brook::stream invmap5_2,
-		::brook::stream invmap5_3,
-		::brook::stream invmap5_4,
-		::brook::stream invmap5_5,
-		::brook::stream forces5,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather1_1 (const float  strwidth,
-		::brook::stream invmap3_1,
-		::brook::stream forces3,
-		::brook::stream invmap5_1,
-		::brook::stream forces5,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather5_2 (const float  strwidth,
-		::brook::stream invmap5_1,
-		::brook::stream invmap5_2,
-		::brook::stream invmap5_3,
-		::brook::stream invmap5_4,
-		::brook::stream invmap5_5,
-		::brook::stream forces5,
-		::brook::stream invmap2_1,
-		::brook::stream invmap2_2,
-		::brook::stream forces2,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather5_3 (const float  strwidth,
-		::brook::stream invmap5_1,
-		::brook::stream invmap5_2,
-		::brook::stream invmap5_3,
-		::brook::stream invmap5_4,
-		::brook::stream invmap5_5,
-		::brook::stream forces5,
-		::brook::stream invmap2_1,
-		::brook::stream invmap2_2,
-		::brook::stream invmap2_3,
-		::brook::stream forces2,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather4_3 (const float  strwidth,
-		::brook::stream invmap5_1,
-		::brook::stream invmap5_2,
-		::brook::stream invmap5_3,
-		::brook::stream invmap5_4,
-		::brook::stream forces5,
-		::brook::stream invmap2_1,
-		::brook::stream invmap2_2,
-		::brook::stream invmap2_3,
-		::brook::stream forces2,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather4_2 (const float  strwidth,
-		::brook::stream invmap4_1,
-		::brook::stream invmap4_2,
-		::brook::stream invmap4_3,
-		::brook::stream invmap4_4,
-		::brook::stream forces4,
-		::brook::stream invmap2_1,
-		::brook::stream invmap2_2,
-		::brook::stream forces2,
-		::brook::stream inforce,
-		::brook::stream outforce);
-
-
-void  kinvmap_gather_merged (const float  natoms,
-		const float  strwidth,
-		const float  istrwidth,
-		const float  fstrwidth,
-		::brook::stream nimap,
-		::brook::stream invmap,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl,
-		::brook::stream inforce,
-		::brook::stream outforce); 
-
-void  kinvmap_gather_merged9 (const float  natoms,
-		const float  strwidth,
-		::brook::stream invmap0,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream invmap4,
-		::brook::stream invmap5,
-		::brook::stream invmap6,
-		::brook::stream invmap7,
-		::brook::stream invmap8,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl,
-		::brook::stream inforce,
-		::brook::stream outforce); 
-
-void  kinvmap_gather_merged5 (const float  natoms,
-		const float  strwidth,
-		::brook::stream invmap0,
-		::brook::stream invmap1,
-		::brook::stream invmap2,
-		::brook::stream invmap3,
-		::brook::stream invmap4,
-		::brook::stream fi,
-		::brook::stream fj,
-		::brook::stream fk,
-		::brook::stream fl,
-		::brook::stream inforce,
-		::brook::stream outforce); 
--- a/platforms/brook/src/kernels/kmerge.br
+++ b/platforms/brook/src/kernels/kmerge.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/* After forces above, we have the forces for even numbered particles
- * in one stream, odd numbered particles in another.
- * In each stream, the forces are in several parts depending on how
- * many times we replicated the input stream.
- *
- * To avoid an extra kernel to zero forces, this sets the forces
- * rather than adding to it.
- * */
-
-kernel void kMergeFloat( 
-      float repfac, 
-      float atomStrWidth, 
-      float pstreamStrWidth,
-      float natoms,
-      float iUnroll,
-      iter float2 count<>, 
-      float pstream1[][], 
-      float pstream2[][], 
-      out float outstream<> )
-{
-   float linind;
-   float2 pindex;
-   float odd;
-   float i;
-   
-   //convert to linear atom index
-   linind = count.x + count.y * atomStrWidth;
-   
-   //If odd or even, we pick from diferent streams.
-   odd = linind - floor( linind / iUnroll ) * iUnroll;
-
-   //Now linear index is the index into partial_streams
-   linind = floor( linind / iUnroll );
-
-   outstream = 0.0f;
-   
-   //If we have predicated conditionals, we should 
-   //keep the conditional inside the loop
-
-   for ( i = 0; i < repfac; i+=1.0f ) {
-
-      pindex.y = floor( linind / pstreamStrWidth );
-      pindex.x = linind - pindex.y * pstreamStrWidth;
-      
-      if ( odd > 0.5f ) { //is odd
-         outstream += pstream2[ pindex ];
-      } else {
-         outstream += pstream1[ pindex ];
-      }
-      linind += natoms/iUnroll; 
-   }
-}
-
-kernel void kMergeFloat4( 
-      float repfac, 
-      float atomStrWidth, 
-      float pstreamStrWidth,
-      float natoms,
-      float iUnroll,
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      out float4 outstream<> )
-{
-   float linind;
-   float2 pindex;
-   float odd;
-   float i;
-   
-   //convert to linear atom index
-   linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth );
-   
-   //If odd or even, we pick from diferent streams.
-   odd = linind - floor( linind / iUnroll ) * iUnroll;
-
-   //Now linear index is the index into partial_streams
-   linind = floor( linind / iUnroll );
-
-   outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-   
-   //If we have predicated conditionals, we should 
-   //keep the conditional inside the loop
-
-   for ( i = 0.0f; i < repfac; i+= 1.0f ) {
-
-      pindex.y = floor( linind / pstreamStrWidth );
-      pindex.x = linind - pindex.y * pstreamStrWidth;
-      
-      if ( odd > 0.5f ) { //is odd
-         outstream += pstream2[ pindex ];
-      } else {
-         outstream += pstream1[ pindex ];
-      }
-      linind += natoms/iUnroll; 
-   }
-}
-
-
-/* After forces above, we have the forces for even numbered particles
- * in one stream, odd numbered particles in another.
- * In each stream, the forces are in several parts depending on how
- * many times we replicated the input stream.
- *
- * To avoid an extra kernel to zero forces, this sets the forces
- * rather than adding to it.
- * */
-
-kernel void kMergeFloat4_4X( 
-      float repfac, 
-      float atomStrWidth, 
-      float pstreamStrWidth,
-      float natoms,
-      float iUnroll,
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      float4 pstream3[][], 
-      float4 pstream4[][], 
-      out float4 outstream<> )
-{
-   float linind;
-   float2 pindex;
-   float odd;
-   float i;
-   
-   //convert to linear atom index
-   linind = (indexof outstream).x + ( (indexof outstream).y * atomStrWidth );
-   
-   //If odd or even, we pick from diferent streams.
-   odd = linind - floor( linind / iUnroll ) * iUnroll;
-
-   //Now linear index is the index into partial_streams
-   linind = floor( linind / iUnroll );
-
-   outstream = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-   
-   //If we have predicated conditionals, we should 
-   //keep the conditional inside the loop
-
-   for ( i = 0.0f; i < repfac; i+= 1.0f ) {
-
-      //pindex.y = floor( linind / pstreamStrWidth );
-      //pindex.x = linind - pindex.y * pstreamStrWidth;
-      
-      pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
-      pindex.x = linind - pindex.y * pstreamStrWidth;
-
-outstream += float4( linind, odd, pindex.x,  pindex.y );
-/*
-      if ( odd < 0.5f ) { //is odd
-         outstream += pstream1[ pindex ];
-      } else if( odd < 1.5f ){
-         outstream += pstream2[ pindex ];
-      } else if( odd < 2.5f ){
-         outstream += pstream3[ pindex ];
-      } else {
-         outstream += pstream4[ pindex ];
-      }
-*/
-
-      linind += natoms/iUnroll; 
-   }
-}
-
-kernel void kMergeFloat4_4( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      float4 pstream3[][], 
-      float4 pstream4[][], 
-      out float4 outstream<> )
-{
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( outstream );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   outstream   = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-   
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      // qIndex            = floor( forceIndex/iUnroll );
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-
-      qOff              = forceIndex - iUnroll*qIndex;
-
-      // pindex.y          = floor( qIndex/ pStreamWidth );
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-
-      // pindex.x          = qIndex - pindex.y*pStreamWidth + qOff;
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-// outstream += float4( forceIndex, qIndex, pindex.x,  pindex.y );
-
-      if ( qOff < 0.5f ){ 
-         outstream += pstream1[ pindex ];
-      } else if( qOff < 1.5f ){
-         outstream += pstream2[ pindex ];
-      } else if( qOff < 2.5f ){
-         outstream += pstream3[ pindex ];
-      } else {
-         outstream += pstream4[ pindex ];
-      }
-
-      forceIndex += roundNatoms;
-   }
-}
-
-kernel void kSetValue4( float value, out float4 outstream<> ){
- outstream = float4( value, value, value, value );
-}
-
-kernel void kSetValue3( float value, out float3 outstream<> ){
- outstream = float3( value, value, value );
-}
-
-kernel void kSetValue2( float value, out float2 outstream<> ){
- outstream = float2( value, value );
-}
-
-kernel void kSetValue1( float value, out float outstream<> ){
- outstream = value;
-}
-
-kernel void kCheck( float natoms, float atomStrWidth, float pstreamStrWidth, float unroll, out float4 outstream<> )
-{
-
- float linind, forceIndex, atomIndex;
- float2 pindex;
-
- pindex     = indexof( outstream );
- forceIndex = unroll*(pindex.x + pindex.y*pstreamStrWidth);
- atomIndex  = fmod( forceIndex, natoms );
- outstream  = float4( pindex.x, pindex.y, forceIndex, atomIndex );
-   
-}
-
-/* After forces above, we have the forces for even numbered particles
- * in one stream, odd numbered particles in another.
- * In each stream, the forces are in several parts depending on how
- * many times we replicated the input stream.
- *
- * To avoid an extra kernel to zero forces, this sets the forces
- * rather than adding to it.
- * */
-
-kernel void kAddAndMergeFloat4( 
-      float repfac, 
-      float atomStrWidth, 
-      float pstreamStrWidth,
-      float natoms,
-      float iUnroll,
-      float4 inStream<>, 
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      out float4 outstream<> )
-{
-   float linind;
-   float2 pindex;
-   float odd;
-   float i;
-   float floor_linind_iUnroll;
-   
-   linind = (indexof outstream).x + (indexof outstream).y * atomStrWidth;
-   
-   //If odd or even, we pick from diferent streams.
-   //odd = linind - floor( linind / iUnroll ) * iUnroll;
-
-   //Now linear index is the index into partial_streams
-   //linind = floor( linind / iUnroll );
-   
-   floor_linind_iUnroll = round( (linind - fmod(linind, iUnroll))/iUnroll );
-   odd = linind - floor_linind_iUnroll * iUnroll;//bixia modify
-   linind = floor_linind_iUnroll; //bixia modify
-
-   outstream   = inStream;
-   outstream.w = 0.0f;
-   
-   //If we have predicated conditionals, we should 
-   //keep the conditional inside the loop
-
-   for ( i = 0.0f; i < repfac; i+= 1.0f ) {
-
-      //pindex.y = floor( linind / pstreamStrWidth );
-      pindex.y = round( (linind - fmod( linind, pstreamStrWidth ))/pstreamStrWidth ); //bixia modify
-      
-      pindex.x = linind - pindex.y * pstreamStrWidth;
-      
-      if ( odd > 0.5f ) { //is odd
-         outstream += pstream2[ pindex ];
-      } else {
-         outstream += pstream1[ pindex ];
-      }
-      linind += natoms/iUnroll; 
-   }
-}
-
-kernel void kAddAndMergeFloat4_4( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float4 inStream<>, 
-      float4 pstream1[][], 
-      float4 pstream2[][], 
-      float4 pstream3[][], 
-      float4 pstream4[][], 
-      out float4 outstream<> ){
-
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( outstream );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   // add current forces in inStream to forces stored in pstreams
-   // the .w entry is Born sum values; it will be used to calculate the
-   // Born radii and obcChain term
-
-   outstream   = inStream;
-   outstream.w = 0.0f;
-//outstream   = float4( 0.0f, 0.0f, 0.0f, 0.0f );
-   
-   // sum over j-loop 'duplications' by gathering from pstreams
-
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      // qIndex            = floor( forceIndex/iUnroll );
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-
-      qOff              = forceIndex - iUnroll*qIndex;
-
-      // pindex.y          = floor( qIndex/ pStreamWidth );
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-
-      // pindex.x          = qIndex - pindex.y*pStreamWidth + qOff;
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-      if( qOff < 0.5f ){ 
-         outstream += pstream1[ pindex ];
-      } else if( qOff < 1.5f ){
-         outstream += pstream2[ pindex ];
-      } else if( qOff < 2.5f ){
-         outstream += pstream3[ pindex ];
-      } else {
-         outstream += pstream4[ pindex ];
-      }
-      forceIndex += roundNatoms;
-   }
-}
-
-/* Add forces from two streams */ 
-
-kernel void kAddForces3_4( float conversion, float3 force1<>, float4 force2<>, out float3 outForce<> ){
-   outForce.xyz  = force1 + conversion*force2.xyz;
-}
-
-/* Copy one stream to another */
-
-kernel void kCopyFloat4( float4 inForce<>, out float4 outForce<> ){
-   outForce = inForce;
-}
-
-/* Copy one stream to another
- * */
-
-kernel void kCopyFloat3To4( 
-      float3 inForce<>, 
-      out float4 outForce<> ){
-   // ---------------------------------------------------------------------------------------
-
-   outForce.xyz = inForce;
-   outForce.w   = 0.0f;
-}
--- a/platforms/brook/src/kernels/kmerge_partial_forces.br
+++ b/platforms/brook/src/kernels/kmerge_partial_forces.br
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/* After forces above, we have the forces for even numbered particles
- * in one stream, odd numbered particles in another.
- * In each stream, the forces are in several parts depending on how
- * many times we replicated the input stream.
- *
- * This kernel should not be a bottle neck, but if it turns out to
- * be so, we can try some work arounds.
- *
- * To avoid an extra kernel to zero forces, this sets the forces
- * rather than adding to it.
- * */
-
-kernel static void kmerge_partial_forces( 
-		float repfac, 
-		float atomStrWidth, 
-		float pforceStrWidth,
-		float natoms,
-		float3 pforce1[][], 
-		float3 pforce2[][], 
-		out float3 force<> )
-{
-	float linind;
-	float2 pindex;
-	float odd;
-	float i;
-	
-	float2 adjustcount = (indexof force);
-	
-	//convert to linear atom index
-	linind = adjustcount.x + adjustcount.y * atomStrWidth;
-
-	//If odd or even, we pick from diferent streams.
-	odd = linind - floor( linind / 2.0f ) * 2.0f;
-
-	//Now linear index is the index into partial_forces
-	linind = floor( linind / 2.0f );
-	force = float3( 0.0f, 0.0f, 0.0f );
-
-	//If we have predicated conditionals, we should 
-	//keep the conditional inside the loop
-	for ( i = 0; i < repfac; i+=1.0f ) {
-		pindex.y = floor( linind / pforceStrWidth );		
-		pindex.x = linind - pindex.y * pforceStrWidth;
-		//pindex.x = fmod( linind, pforceStrWidth );
-		
-		if ( odd > 0.5f ) { //is odd
-			force += pforce2[ pindex ];
-		}
-		else {
-			force += pforce1[ pindex ];
-		}
-		linind += natoms/2.0f; 
-	}
-}
-
-kernel void kMergeFloat3_4( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float3 pstream1[][], 
-      float3 pstream2[][], 
-      float3 pstream3[][], 
-      float3 pstream4[][], 
-      out float3 outstream<> )
-{
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( outstream );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   outstream   = float3( 0.0f, 0.0f, 0.0f );
-   
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-      qOff              = forceIndex - iUnroll*qIndex;
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-      if ( qOff < 0.5f ){ 
-         outstream += pstream1[ pindex ];
-      } else if( qOff < 1.5f ){
-         outstream += pstream2[ pindex ];
-      } else if( qOff < 2.5f ){
-         outstream += pstream3[ pindex ];
-      } else {
-         outstream += pstream4[ pindex ];
-      }
-
-      forceIndex += roundNatoms;
-   }
-}
-
-
-kernel void kMergeFloat3_4_nobranch( 
-      float repfac, 
-      float atomStreamWidth, 
-      float pStreamWidth,
-      float natoms,
-      float roundNatoms,
-      float iUnroll,
-      float3 pstream1[][], 
-      float3 pstream2[][], 
-      float3 pstream3[][], 
-      float3 pstream4[][], 
-      out float3 outstream<> )
-{
-   float atomIndex, forceIndex, qIndex, qOff;
-   float2 pindex;
-   float i;
-   float3 o1,o2,o3,o4;
-   float3 tmp;
-   
-   // given atom index find force indices and streams
-
-   pindex      = indexof( outstream );
-   atomIndex   = pindex.x + pindex.y*atomStreamWidth;
-   forceIndex  = atomIndex;
-
-   outstream   = float3( 0.0f, 0.0f, 0.0f );
-   
-   for( i = 0.0f; i < repfac; i += 1.0f ){
-
-      qIndex            = round( (forceIndex - fmod( forceIndex, iUnroll))/iUnroll );
-      qOff              = forceIndex - iUnroll*qIndex;
-      pindex.y          = round( (qIndex - fmod( qIndex, pStreamWidth ))/pStreamWidth );
-      pindex.x          = qIndex - pindex.y*pStreamWidth;
-
-      o1 = pstream1[ pindex ];
-      o2 = pstream2[ pindex ];
-      o3 = pstream3[ pindex ];
-      o4 = pstream4[ pindex ];
-      
-      tmp = qOff < 0.5f ? o1 : o2;
-      tmp = qOff < 1.5f ? tmp : o3;
-      tmp = qOff < 2.5f ? tmp : o4;
-
-      outstream += tmp;
-      
-      forceIndex += roundNatoms;
-   }
-}
-
--- a/platforms/brook/src/kernels/kshakeh.br
+++ b/platforms/brook/src/kernels/kshakeh.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/*
- * This kernel constrains bonds involving hydrogen using an unrolled
- * shake implementation. We assume that a heavy atom can have at most 
- * 3 hydrogens attached. The hydrogens are expected to be identical
- *
- * For atoms that have ewer than 3 hydrogens
- *
- * Also we don't check the constraints!
- *  
- * We do a fixed number of iterations without checking for convergence
- * to avoid stalling
- * *************************************************************/
-
-/* Fix for precision of terms first order in dt
- * To be used with corresponding update kernels
- * The input posqp is now changes to posq rather than
- * posq+changes
- * */
-kernel void
-kshakeh_fix1(
-		float maxIterations,      //number of iterations
-		float strwidth, //stream width of posq
-		float inputTolerance,    //tolerance
-		float4 atoms<>, //heavy0, h1, h2, h3
-		float3 posq[][], //positions before update
-		float3 posqp[][], //changes to positions
-		float4 params<>, // (1/m0, mu/2, blensq, 1/m1 )
-		out float3 cposq0<>, //constrained position for heavy atom
-		out float3 cposq1<>, //ditto for h1
-		out float3 cposq2<>, //ditto for h2
-		out float3 cposq3<> //ditto for h3
-		) {
-
-	float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
-	float i; //iteration count
-	float3 xi, xj1, xj2, xj3; //coordinates
-	float3 xpi, xpj1, xpj2, xpj3; //coordinates
-	float3 rij1, rij2, rij3, rpij, dr;
-	float rpsqij, rrpr, acor, tolerance, converged;
-	float mask2, mask3;
-	float diff;
-	float rij1sq, rij2sq, rij3sq;
-	float ld1, ld2, ld3;
-	
-	// ai.y  = floor( atoms.x / strwidth );
-   ai.y  = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
-	ai.x  = atoms.x - ai.y * strwidth;
-
-	// aj1.y = floor( atoms.y / strwidth );
-   aj1.y  = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
-	aj1.x = atoms.y - aj1.y * strwidth;
-
-	//If further hydrogens are absent,
-	//just set to the coordinates of the first
-	//so we don't have any junk memory accesses
-	//or nans/infs in the calcs.	
-
-	if( atoms.z > -0.5f ){
-		aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
-		aj2.x = atoms.z - aj2.y * strwidth;
-		mask2 = 1.0f;
-	} else {
-		aj2   = aj1; 
-		mask2 = 0.0f;
-	}
-
-	if( atoms.w > -0.5f ){	
-		aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
-		aj3.x = atoms.w - aj3.y * strwidth;
-		mask3 = 1.0f;
-	} else {
-		aj3   = aj1;
-		mask3 = 0.0f;
-	}
-
-	cposq0       = posq[ai];
-	cposq1       = posq[aj1];
-	cposq2       = posq[aj2];
-	cposq3       = posq[aj3];
-	
-	xi           = cposq0;
-	xj1          = cposq1;
-	xj2          = cposq2;
-	xj3          = cposq3;
-	
-	rij1         = xi - xj1;
-	rij2         = xi - xj2;
-	rij3         = xi - xj3;
-
-	rij1sq       = dot( rij1, rij1 );
-	rij2sq       = dot( rij2, rij2 );
-	rij3sq       = dot( rij3, rij3 );
-
-	ld1          = params.z - rij1sq;
-	ld2          = params.z - rij2sq;
-	ld3          = params.z - rij3sq;
-
-	xpi          = posqp[ai];
-	xpj1         = posqp[aj1];
-	xpj2         = posqp[aj2];
-	xpj3         = posqp[aj3];
-
-
-	i           = 0.0f;
-   converged   = 1.0f;
-   tolerance   = 2.0f*inputTolerance;
-	while( i < maxIterations && converged > 0.0f ){
-
-		//First hydrogen
-
-		rpij       = xpi - xpj1; //This is really rpij - rij
-		rpsqij     = dot( rpij, rpij ); //This is really deltar ^ 2
-		rrpr       = dot( rij1, rpij ); //This is r.deltar
-      acor       = ( ld1 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ; 
-      diff       = abs( ld1 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
-      acor       = (diff < 1.0f) ? 0.0f : acor;
-      converged  = abs( acor );
-
-		dr         = rij1 * acor;
-		xpi       += dr * params.x;
-		xpj1      -= dr * params.w;
-
-		//Second hydrogen
-
-		rpij       = xpi - xpj2;
-		rpsqij     = dot( rpij, rpij );
-		rrpr       = dot( rij2, rpij );
-
-      diff       = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
-      acor       = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ; 
-      acor       = (diff < 1.0f) ? 0.0f : acor;
-      converged += abs( acor );
-
-		dr         = rij2 * acor;
-		xpi       += dr * params.x;
-		xpj2      -= dr * params.w;
-
-		//Third hydrogen
-
-		rpij       = xpi - xpj3;
-		rpsqij     = dot( rpij, rpij );
-		rrpr       = dot( rij3, rpij );
-      diff       = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
-      acor       = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
-      acor       = (diff < 1.0f) ? 0.0f : acor;
-      converged += abs( acor );
-
-		dr         = rij3 * acor;
-		xpi       += dr * params.x;
-		xpj3      -= dr * params.w;
-		
-		i += 1.0f;
-	}
-
-	//Output modified delta's
-
-	cposq0 = xpi;
-	cposq1 = xpj1;
-	cposq2 = xpj2;
-	cposq3 = xpj3;
-
-}
-
-kernel void kshakeh_update1_fix1( 
-		float strwidth, //width of cposq streams
-		float2 invmap<>,    //shakeh inverse map
-		float3 posqp<>,    //deltas from sd2
-		float3 cposq0[][], //constrained delta for heavy atom
-		float3 cposq1[][], //ditto for h1
-		float3 cposq2[][], //ditto for h2
-		float3 cposq3[][], //ditto for h3
-		out float3 oposq<> //updated deltas
-		){
-
-	float2 atom;
-	
-   atom.y  = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
-	atom.x  = invmap.x - atom.y * strwidth;
-	
-	if( invmap.y < 0 ){
-		oposq  = posqp;
-	} else if( invmap.y < 0.5f ){
-		oposq  = cposq0[ atom ];
-	} else if( invmap.y < 1.5f ){
-		oposq  = cposq1[ atom ];
-	} else if( invmap.y < 2.5f ){
-		oposq  = cposq2[ atom ];
-	} else if( invmap.y < 3.5f ){
-		oposq  = cposq3[ atom ];
-   }
-
-}
-
-kernel void kshakeh_update2_fix1( 
-		float strwidth, //width of cposq streams
-		float2 invmap<>,    //shakeh inverse map
-		float3 posq<>,     //old positions
-		float3 posqp<>,    //deltas from sd2
-		float3 cposq0[][], //constrained delta for heavy atom
-		float3 cposq1[][], //ditto for h1
-		float3 cposq2[][], //ditto for h2
-		float3 cposq3[][], //ditto for h3
-		out float3 oposq<> //updated deltas
-		){
-
-	float2 atom;
-	
-   atom.y  = round( (invmap.x - fmod( invmap.x, strwidth ))/strwidth );
-	atom.x  = invmap.x - atom.y * strwidth;
-	
-	if ( invmap.y < 0 ){
-		oposq = posqp;
-	} else if ( invmap.y < 0.5f ){ 
-		oposq = cposq0[ atom ];
-	} else if ( invmap.y < 1.5f){
-		oposq = cposq1[ atom ];
-	} else if ( invmap.y < 2.5f ){
-		oposq = cposq2[ atom ];
-	} else if ( invmap.y < 3.5f ){
-		oposq = cposq3[ atom ];
-   }
-
-	oposq += posq;
-}
--- a/platforms/brook/src/kernels/kshakeh.h
+++ b/platforms/brook/src/kernels/kshakeh.h
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-void  kshakeh_fix1 (
-      const float  maxIterations,
-		const float  strwidth,
-		const float  tolerance,
-		::brook::stream atoms,
-		::brook::stream posq,
-		::brook::stream posqp,
-		::brook::stream params,
-		::brook::stream cposq0,
-		::brook::stream cposq1,
-		::brook::stream cposq2,
-		::brook::stream cposq3);
-
-void  kshakeh_update1_fix1 (
-      const float  strwidth,
-		::brook::stream invmap,
-		::brook::stream posqp,
-		::brook::stream cposq0,
-		::brook::stream cposq1,
-		::brook::stream cposq2,
-		::brook::stream cposq3,
-		::brook::stream oposq); 
-
-void  kshakeh_update2_fix1 (
-      const float  strwidth,
-		::brook::stream invmap,
-		::brook::stream posq,
-		::brook::stream posqp,
-		::brook::stream cposq0,
-		::brook::stream cposq1,
-		::brook::stream cposq2,
-		::brook::stream cposq3,
-		::brook::stream oposq); 
--- a/platforms/brook/src/kernels/kupdatebd.br
+++ b/platforms/brook/src/kernels/kupdatebd.br
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/*
- * Brownian dynamics integration
- *
- * @param xstrwidth      atom stream width
- * @param gstrwidth      Gaussian stream width
- * @param goffset        Gaussian offset into stream
- * @param forceScale     force scale factor
- * @param noiseAmplitude noise amplitude
- * @param fgauss         random numbers
- * @param pos            atom positions
- * @param force          force
- * @param posp           delta positions
- *
- **/
-
-kernel void kintegrate_bd( float xstrwidth, float gstrwidth, float goffset, 
-                           float forceScale, float noiseAmplitude, float3 fgauss[][],
-                           float3 pos<>, float3 force<>, out float3 posp<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   float3 fg1;
-   float linind_gauss; //linear index into the random numbers
-   float2 igauss;      //2d index
-
-   // ---------------------------------------------------------------------------------------
-
-   igauss       = indexof( pos );
-   linind_gauss = igauss.y * xstrwidth + igauss.x;
-   linind_gauss = linind_gauss + goffset;    
-   
-   igauss.y     = round( (linind_gauss - fmod( linind_gauss, gstrwidth))/gstrwidth);
-   igauss.x     = linind_gauss - igauss.y * gstrwidth;
-
-   fg1          = fgauss[ igauss ];
-
-   posp         = forceScale*force + noiseAmplitude*fg1;
-}
-
-/*
- * Brownian dynamics update
- *
- * @param velocityScale  velocity scale
- * @param posp           atom positions
- * @param posIn          atom positions
- * @param velocity       velocity
- * @param posp           delta positions
- *
-    rfac     = sqrt(2.0*BOLTZ*temp/(fr*dt));
-    invfr    = 1.0/fr;
- vn          = invfr*f[n][d] + rfac*fgauss(&jran);
-v[n][d]      = vn;
-xprime[n][d] = x[n][d]+vn*dt;
- **/
-
-kernel void kupdate_bd( float velocityScale,
-                        float3 posp<>, float3 posIn<>, out float3 velocity<>, out float3 posOut<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // ---------------------------------------------------------------------------------------
-
-   velocity    = velocityScale*( posp - posIn );
-   posOut      = posp;
-}
-
-/*
- * Brownian dynamics update (no Shake)
- *
- * @param velocityScale  velocity scale
- * @param posp           atom positions
- * @param posIn          atom positions
- * @param velocity       velocity
- * @param posp           delta positions
- *
- **/
-
-kernel void kupdate_bd2( float velocityScale,
-                         float3 posp<>, float3 posIn<>, out float3 velocity<>, out float3 posOut<> ){
-
-   // ---------------------------------------------------------------------------------------
-
-   // ---------------------------------------------------------------------------------------
-
-   velocity    = velocityScale*posp;
-   posOut      = posIn + posp;
-}
-
--- a/platforms/brook/src/kernels/kupdatebd.h
+++ b/platforms/brook/src/kernels/kupdatebd.h
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs, Mike Houston                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-
-/*
- * Brownian dynamics integration
- *
- * @param xstrwidth      atom stream width
- * @param gstrwidth      Gaussian stream width
- * @param goffset        Gaussian offset into stream
- * @param forceScale     force scale factor
- * @param noiseAmplitude noise amplitude
- * @param fgauss         random numbers
- * @param pos            atom positions
- * @param force          force
- * @param posp           delta positions
- *
- **/
- 
-void kintegrate_bd( const float xstrwidth, const float gstrwidth, const float goffset,
-                    const float forceScale, const float noiseAmplitude, 
-                    ::brook::stream fgauss, ::brook::stream posq, 
-                    ::brook::stream force, ::brook::stream posp );
-
-/*
- * Brownian dynamics update
- *
- * @param velocityScale  velocity scale
- * @param posp           atom positions
- * @param posIn          atom positions
- * @param velocity       velocity
- * @param posp           delta positions
- *
- **/
-
-void kupdate_bd( const float velocityScale, ::brook::stream posp, ::brook::stream posIn, 
-                 ::brook::stream velocity, ::brook::stream posOut );
-
-
-/*
- * Brownian dynamics update
- *
- * @param velocityScale  velocity scale
- * @param posp           atom positions
- * @param velocity       velocity
- * @param posp           delta positions
- *
- **/
-
-void kupdate_bd2( const float velocityScale, ::brook::stream posp,
-                  ::brook::stream posIn, ::brook::stream velocity, ::brook::stream posOut );
-
-