Merge remote-tracking branch 'upstream/master'

bf6d95c2 · John Chodera (MSKCC) · 896413aa · 227c86bf · bf6d95c2 · bf6d95c2
Commit bf6d95c2 authored Jun 05, 2015 by John Chodera (MSKCC)
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,20 +81,6 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
 SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 platforms/reference serialization libraries/validate libraries/irrxml)
 IF(WIN32)
    SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/pthreads)
-    ADD_CUSTOM_TARGET(PthreadsLibraries ALL)
-    FILE(GLOB PTHREADS_LIBRARIES libraries/pthreads/lib/*.dll libraries/pthreads/lib/*.lib)
-	ADD_CUSTOM_COMMAND(TARGET PthreadsLibraries COMMAND  ${CMAKE_COMMAND} ARGS -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/")
-    FOREACH(lib ${PTHREADS_LIBRARIES})
-        ADD_CUSTOM_COMMAND(TARGET PthreadsLibraries COMMAND ${CMAKE_COMMAND} ARGS -E copy ${lib} "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/")
-        INSTALL(FILES ${lib} DESTINATION "lib/")
-    ENDFOREACH(lib)
-    LINK_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
-    IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
-        SET(PTHREADS_LIB pthreadVC2_x64)
-    ELSE(CMAKE_SIZEOF_VOID_P EQUAL 8)
-        SET(PTHREADS_LIB pthreadVC2)
-        SET(PTHREADS_LIB_STATIC pthreadVC2_static_mt)
-    ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 8)
 ELSE(WIN32)
    IF (NOT ANDROID)
        SET(PTHREADS_LIB pthread)
@@ -303,17 +289,14 @@ ENDIF (CMAKE_SYSTEM_NAME MATCHES "Linux")
 IF(OPENMM_BUILD_SHARED_LIB)
    ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
-    SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY -DOPENMM_VALIDATE_BUILDING_SHARED_LIBRARY")
+    SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY -DOPENMM_VALIDATE_BUILDING_SHARED_LIBRARY -DPTHREAD_BUILDING_SHARED_LIBRARY")
-    IF(WIN32)
-        ADD_DEPENDENCIES(${SHARED_TARGET} PthreadsLibraries)
-    ENDIF(WIN32)
 ENDIF(OPENMM_BUILD_SHARED_LIB)
 SET(OPENMM_BUILD_STATIC_LIB OFF CACHE BOOL "Whether to build static OpenMM libraries")
 IF(OPENMM_BUILD_STATIC_LIB)
    ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
    SET(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_USE_STATIC_LIBRARIES -DLEPTON_USE_STATIC_LIBRARIES -DPTW32_STATIC_LIB")
-    SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_BUILDING_STATIC_LIBRARY -DOPENMMM_VALIDATE_BUILDING_STATIC_LIBRARY -DOPENMM_VALIDATE_BUILDING_STATIC_LIBRARY")
+    SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_BUILDING_STATIC_LIBRARY -DOPENMMM_VALIDATE_BUILDING_STATIC_LIBRARY -DOPENMM_VALIDATE_BUILDING_STATIC_LIBRARY -DPTHREAD_BUILDING_STATIC_LIBRARY")
 ENDIF(OPENMM_BUILD_STATIC_LIB)
 IF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)

--- a/cmake_modules/FindOpenCL.cmake
+++ b/cmake_modules/FindOpenCL.cmake
@@ -33,6 +33,7 @@ find_path(OPENCL_INCLUDE_DIR
        "/usr/local/cuda"
        "/usr/local/streamsdk"
        "/usr"
+        "${CUDA_TOOLKIT_ROOT_DIR}"
    PATH_SUFFIXES "include"
 )

--- a/docs-source/usersguide/application.rst
+++ b/docs-source/usersguide/application.rst
@@ -1121,6 +1121,25 @@ integrator is:
 The parameter is the integration error tolerance (0.001), whose meaning is the
 same as for the Langevin integrator.
+Multiple Time Step Integrator
+-----------------------------
+The :class:`MTSIntegrator` class implements the rRESPA multiple time step
+algorithm\ :cite:`Tuckerman1992`.  This allows some forces in the system to be evaluated more
+frequently than others.  For details on how to use it, consult the API
+documentation.
+aMD Integrator
+--------------
+There are three different integrator types that implement variations of the
+aMD\ :cite:`Hamelberg2007` accelerated sampling algorithm: :class:`AMDIntegrator`,
+:class:`AMDForceGroupIntegrator`, and :class:`DualAMDIntegrator`.  They
+perform integration on a modified potential energy surface to allow much faster
+sampling of conformations.  For details on how to use them, consult the API
+documentation.
 Temperature Coupling
 ====================

--- a/docs-source/usersguide/references.bib
+++ b/docs-source/usersguide/references.bib
@@ -90,6 +90,17 @@
   type = {Journal Article}
 }
+@article{Hamelberg2007,
+   author={Hamelberg, Donald and de Oliveira, Cesar Augusto F. and McCammon, J. Andrew},
+   title={Sampling of slow diffusive conformational transitions with accelerated molecular dynamics},
+   journal={Journal of Chemical Physics},
+   volume={127},
+   number={15},
+   pages={155102},
+   year={2007},
+   type = {Journal Article}
+}
 @article{Hawkins1995
   author = {Hawkins, Gregory D. and Cramer, Christopher J. and Truhlar, Donald G.},
   title = {Pairwise solute descreening of solute charges from a dielectric medium},
@@ -440,6 +451,17 @@
   type = {Journal Article}
 }
+@article{Tuckerman1992,
+   author={Tuckerman, M. and Berne, Bruce J. and Martyna, Glenn J.},
+   title={Reversible multiple time scale molecular dynamics},
+   journal = {Journal of Chemical Physics},
+   volume={97},
+   number={3},
+   pages={1990-2001},
+   year={1992},
+   type = {Journal Article}
+}
 @article{Uberuaga2004,
  author =   {Blas P. Uberuaga and Marian Anghel and Arthur
                  F. Voter},

--- a/libraries/pthreads/include/pthread.h
+++ b/libraries/pthreads/include/pthread.h
--- a/libraries/pthreads/include/sched.h
+++ b/libraries/pthreads/include/sched.h
-/*
- * Module: sched.h
- *
- * Purpose:
- *      Provides an implementation of POSIX realtime extensions
- *      as defined in 
- *
- *              POSIX 1003.1b-1993      (POSIX.1b)
- *
- * --------------------------------------------------------------------------
- *
- *      Pthreads-win32 - POSIX Threads Library for Win32
- *      Copyright(C) 1998 John E. Bossom
- *      Copyright(C) 1999,2005 Pthreads-win32 contributors
- * 
- *      Contact Email: rpj@callisto.canberra.edu.au
- * 
- *      The current list of contributors is contained
- *      in the file CONTRIBUTORS included with the source
- *      code distribution. The list can also be seen at the
- *      following World Wide Web location:
- *      http://sources.redhat.com/pthreads-win32/contributors.html
- * 
- *      This library is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU Lesser General Public
- *      License as published by the Free Software Foundation; either
- *      version 2 of the License, or (at your option) any later version.
- * 
- *      This library is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *      Lesser General Public License for more details.
- * 
- *      You should have received a copy of the GNU Lesser General Public
- *      License along with this library in the file COPYING.LIB;
- *      if not, write to the Free Software Foundation, Inc.,
- *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-#ifndef _SCHED_H
-#define _SCHED_H
-#undef PTW32_LEVEL
-#if defined(_POSIX_SOURCE)
-#define PTW32_LEVEL 0
-/* Early POSIX */
-#endif
-#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 1
-/* Include 1b, 1c and 1d */
-#endif
-#if defined(INCLUDE_NP)
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 2
-/* Include Non-Portable extensions */
-#endif
-#define PTW32_LEVEL_MAX 3
-#if !defined(PTW32_LEVEL)
-#define PTW32_LEVEL PTW32_LEVEL_MAX
-/* Include everything */
-#endif
-#if __GNUC__ && ! defined (__declspec)
-# error Please upgrade your GNU compiler to one that supports __declspec.
-#endif
-/*
- * When building the DLL code, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the DLL,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
-#ifndef PTW32_STATIC_LIB
-#  ifdef PTW32_BUILD
-#    define PTW32_DLLPORT __declspec (dllexport)
-#  else
-#    define PTW32_DLLPORT __declspec (dllimport)
-#  endif
-#else
-#  define PTW32_DLLPORT
-#endif
-/*
- * This is a duplicate of what is in the autoconf config.h,
- * which is only used when building the pthread-win32 libraries.
- */
-#ifndef PTW32_CONFIG_H
-#  if defined(WINCE)
-#    define NEED_ERRNO
-#    define NEED_SEM
-#  endif
-#  if defined(_UWIN) || defined(__MINGW32__)
-#    define HAVE_MODE_T
-#  endif
-#endif
-/*
- *
- */
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-#ifdef NEED_ERRNO
-#include "need_errno.h"
-#else
-#include <errno.h>
-#endif
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-#if defined(__MINGW32__) || defined(_UWIN)
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-/* For pid_t */
-#  include <sys/types.h>
-/* Required by Unix 98 */
-#  include <time.h>
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-#else
-typedef int pid_t;
-#endif
-/* Thread scheduling policies */
-enum {
-  SCHED_OTHER = 0,
-  SCHED_FIFO,
-  SCHED_RR,
-  SCHED_MIN   = SCHED_OTHER,
-  SCHED_MAX   = SCHED_RR
-};
-struct sched_param {
-  int sched_priority;
-};
-#ifdef __cplusplus
-extern "C"
-{
-#endif                          /* __cplusplus */
-PTW32_DLLPORT int __cdecl sched_yield (void);
-PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy);
-PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy);
-PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy);
-PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid);
-/*
- * Note that this macro returns ENOTSUP rather than
- * ENOSYS as might be expected. However, returning ENOSYS
- * should mean that sched_get_priority_{min,max} are
- * not implemented as well as sched_rr_get_interval.
- * This is not the case, since we just don't support
- * round-robin scheduling. Therefore I have chosen to
- * return the same value as sched_setscheduler when
- * SCHED_RR is passed to it.
- */
-#define sched_rr_get_interval(_pid, _interval) \
-  ( errno = ENOTSUP, (int) -1 )
-#ifdef __cplusplus
-}                               /* End of extern "C" */
-#endif                          /* __cplusplus */
-#undef PTW32_LEVEL
-#undef PTW32_LEVEL_MAX
-#endif                          /* !_SCHED_H */
--- a/libraries/pthreads/include/semaphore.h
+++ b/libraries/pthreads/include/semaphore.h
-/*
- * Module: semaphore.h
- *
- * Purpose:
- *	Semaphores aren't actually part of the PThreads standard.
- *	They are defined by the POSIX Standard:
- *
- *		POSIX 1003.1b-1993	(POSIX.1b)
- *
- * --------------------------------------------------------------------------
- *
- *      Pthreads-win32 - POSIX Threads Library for Win32
- *      Copyright(C) 1998 John E. Bossom
- *      Copyright(C) 1999,2005 Pthreads-win32 contributors
- * 
- *      Contact Email: rpj@callisto.canberra.edu.au
- * 
- *      The current list of contributors is contained
- *      in the file CONTRIBUTORS included with the source
- *      code distribution. The list can also be seen at the
- *      following World Wide Web location:
- *      http://sources.redhat.com/pthreads-win32/contributors.html
- * 
- *      This library is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU Lesser General Public
- *      License as published by the Free Software Foundation; either
- *      version 2 of the License, or (at your option) any later version.
- * 
- *      This library is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *      Lesser General Public License for more details.
- * 
- *      You should have received a copy of the GNU Lesser General Public
- *      License along with this library in the file COPYING.LIB;
- *      if not, write to the Free Software Foundation, Inc.,
- *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-#if !defined( SEMAPHORE_H )
-#define SEMAPHORE_H
-#undef PTW32_LEVEL
-#if defined(_POSIX_SOURCE)
-#define PTW32_LEVEL 0
-/* Early POSIX */
-#endif
-#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 1
-/* Include 1b, 1c and 1d */
-#endif
-#if defined(INCLUDE_NP)
-#undef PTW32_LEVEL
-#define PTW32_LEVEL 2
-/* Include Non-Portable extensions */
-#endif
-#define PTW32_LEVEL_MAX 3
-#if !defined(PTW32_LEVEL)
-#define PTW32_LEVEL PTW32_LEVEL_MAX
-/* Include everything */
-#endif
-#if __GNUC__ && ! defined (__declspec)
-# error Please upgrade your GNU compiler to one that supports __declspec.
-#endif
-/*
- * When building the DLL code, you should define PTW32_BUILD so that
- * the variables/functions are exported correctly. When using the DLL,
- * do NOT define PTW32_BUILD, and then the variables/functions will
- * be imported correctly.
- */
-#ifndef PTW32_STATIC_LIB
-#  ifdef PTW32_BUILD
-#    define PTW32_DLLPORT __declspec (dllexport)
-#  else
-#    define PTW32_DLLPORT __declspec (dllimport)
-#  endif
-#else
-#  define PTW32_DLLPORT
-#endif
-/*
- * This is a duplicate of what is in the autoconf config.h,
- * which is only used when building the pthread-win32 libraries.
- */
-#ifndef PTW32_CONFIG_H
-#  if defined(WINCE)
-#    define NEED_ERRNO
-#    define NEED_SEM
-#  endif
-#  if defined(_UWIN) || defined(__MINGW32__)
-#    define HAVE_MODE_T
-#  endif
-#endif
-/*
- *
- */
-#if PTW32_LEVEL >= PTW32_LEVEL_MAX
-#ifdef NEED_ERRNO
-#include "need_errno.h"
-#else
-#include <errno.h>
-#endif
-#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
-#define _POSIX_SEMAPHORES
-#ifdef __cplusplus
-extern "C"
-{
-#endif				/* __cplusplus */
-#ifndef HAVE_MODE_T
-typedef unsigned int mode_t;
-#endif
-typedef struct sem_t_ * sem_t;
-PTW32_DLLPORT int __cdecl sem_init (sem_t * sem,
-			    int pshared,
-			    unsigned int value);
-PTW32_DLLPORT int __cdecl sem_destroy (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_trywait (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_wait (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_timedwait (sem_t * sem,
-				 const struct timespec * abstime);
-PTW32_DLLPORT int __cdecl sem_post (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_post_multiple (sem_t * sem,
-				     int count);
-PTW32_DLLPORT int __cdecl sem_open (const char * name,
-			    int oflag,
-			    mode_t mode,
-			    unsigned int value);
-PTW32_DLLPORT int __cdecl sem_close (sem_t * sem);
-PTW32_DLLPORT int __cdecl sem_unlink (const char * name);
-PTW32_DLLPORT int __cdecl sem_getvalue (sem_t * sem,
-				int * sval);
-#ifdef __cplusplus
-}				/* End of extern "C" */
-#endif				/* __cplusplus */
-#undef PTW32_LEVEL
-#undef PTW32_LEVEL_MAX
-#endif				/* !SEMAPHORE_H */
--- a/libraries/pthreads/lib/pthreadVC2.dll
+++ b/libraries/pthreads/lib/pthreadVC2.dll
--- a/libraries/pthreads/lib/pthreadVC2.lib
+++ b/libraries/pthreads/lib/pthreadVC2.lib
--- a/libraries/pthreads/lib/pthreadVC2_static_mt.lib
+++ b/libraries/pthreads/lib/pthreadVC2_static_mt.lib
--- a/libraries/pthreads/lib/pthreadVC2_x64.dll
+++ b/libraries/pthreads/lib/pthreadVC2_x64.dll
--- a/libraries/pthreads/lib/pthreadVC2_x64.lib
+++ b/libraries/pthreads/lib/pthreadVC2_x64.lib
--- a/libraries/pthreads/src/pthread.cpp
+++ b/libraries/pthreads/src/pthread.cpp
+#include "pthread.h"
+PTHREAD_EXPORT volatile long _pthread_cancelling;
+PTHREAD_EXPORT int _pthread_concur;
+/* Will default to zero as needed */
+PTHREAD_EXPORT pthread_once_t _pthread_tls_once;
+PTHREAD_EXPORT DWORD _pthread_tls;
+/* Note initializer is zero, so this works */
+PTHREAD_EXPORT pthread_rwlock_t _pthread_key_lock;
+PTHREAD_EXPORT long _pthread_key_max;
+PTHREAD_EXPORT long _pthread_key_sch;
+PTHREAD_EXPORT void (**_pthread_key_dest)(void *);
--- a/platforms/cpu/include/CpuNonbondedForce.h
+++ b/platforms/cpu/include/CpuNonbondedForce.h
-/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -174,8 +174,8 @@ protected:
        float alphaEwald;
        int numRx, numRy, numRz;
        int meshDim[3];
-        std::vector<float> ewaldScaleTable;
+        std::vector<float> erfcTable, ewaldScaleTable;
-        float ewaldDX, ewaldDXInv;
+        float ewaldDX, ewaldDXInv, erfcDXInv;
        std::vector<double> threadEnergy;
        // The following variables are used to make information accessible to the individual threads.
        int numberOfAtoms;
@@ -241,7 +241,7 @@ protected:
      /**
       * Compute a fast approximation to erfc(x).
       */
-      static float erfcApprox(float x);
+      float erfcApprox(float x);
 };
 } // namespace OpenMM

--- a/platforms/cpu/include/CpuNonbondedForceVec4.h
+++ b/platforms/cpu/include/CpuNonbondedForceVec4.h
-/* Portions copyright (c) 2006-2014 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -87,7 +87,7 @@ protected:
      /**
       * Compute a fast approximation to erfc(x).
       */
-      static fvec4 erfcApprox(const fvec4& x);
+      fvec4 erfcApprox(const fvec4& x);
      /**
       * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)

--- a/platforms/cpu/include/CpuNonbondedForceVec8.h
+++ b/platforms/cpu/include/CpuNonbondedForceVec8.h
-/* Portions copyright (c) 2006-2014 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -86,7 +86,7 @@ protected:
      /**
       * Compute a fast approximation to erfc(x).
       */
-      static fvec8 erfcApprox(const fvec8& x);
+      fvec8 erfcApprox(const fvec8& x);
      /**
       * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)

--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
@@ -171,17 +171,20 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
  }
-void CpuNonbondedForce::tabulateEwaldScaleFactor() {
+  void CpuNonbondedForce::tabulateEwaldScaleFactor() {
    if (tableIsValid)
        return;
    tableIsValid = true;
    ewaldDX = cutoffDistance/NUM_TABLE_POINTS;
    ewaldDXInv = 1.0f/ewaldDX;
+    erfcDXInv = 1.0f/(ewaldDX*alphaEwald);
+    erfcTable.resize(NUM_TABLE_POINTS+4);
    ewaldScaleTable.resize(NUM_TABLE_POINTS+4);
    for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
        double r = i*ewaldDX;
        double alphaR = alphaEwald*r;
-        ewaldScaleTable[i] = erfc(alphaR) + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
+        erfcTable[i] = erfc(alphaR);
+        ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
    }
 }
@@ -473,14 +476,10 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
 }
 float CpuNonbondedForce::erfcApprox(float x) {
-    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
+    float x1 = x*erfcDXInv;
-    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
+    int index = min((int) floor(x1), NUM_TABLE_POINTS);
-    // error of 3e-7.
+    float coeff2 = x1-index;
+    float coeff1 = 1.0f-coeff2;
-    float t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x;
+    return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
-    t *= t;
-    t *= t;
-    t *= t;
-    return 1.0f/(t*t);
 }
--- a/platforms/cpu/src/CpuNonbondedForceVec4.cpp
+++ b/platforms/cpu/src/CpuNonbondedForceVec4.cpp
-/* Portions copyright (c) 2006-2014 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -393,15 +393,16 @@ void CpuNonbondedForceVec4::getDeltaR(const fvec4& posI, const fvec4& x, const f
 }
 fvec4 CpuNonbondedForceVec4::erfcApprox(const fvec4& x) {
-    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
+    fvec4 x1 = x*erfcDXInv;
-    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
+    ivec4 index = min(floor(x1), NUM_TABLE_POINTS);
-    // error of 3e-7.
+    fvec4 coeff2 = x1-index;
+    fvec4 coeff1 = 1.0f-coeff2;
-    fvec4 t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x;
+    fvec4 t1(&erfcTable[index[0]]);
-    t *= t;
+    fvec4 t2(&erfcTable[index[1]]);
-    t *= t;
+    fvec4 t3(&erfcTable[index[2]]);
-    t *= t;
+    fvec4 t4(&erfcTable[index[3]]);
-    return 1.0f/(t*t);
+    transpose(t1, t2, t3, t4);
+    return coeff1*t1 + coeff2*t2;
 }
 fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {

--- a/platforms/cpu/src/CpuNonbondedForceVec8.cpp
+++ b/platforms/cpu/src/CpuNonbondedForceVec8.cpp
-/* Portions copyright (c) 2006-2014 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -423,15 +423,23 @@ void CpuNonbondedForceVec8::getDeltaR(const fvec4& posI, const fvec8& x, const f
 }
 fvec8 CpuNonbondedForceVec8::erfcApprox(const fvec8& x) {
-    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
+    fvec8 x1 = x*erfcDXInv;
-    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
+    ivec8 index = min(floor(x1), NUM_TABLE_POINTS);
-    // error of 3e-7.
+    fvec8 coeff2 = x1-index;
+    fvec8 coeff1 = 1.0f-coeff2;
-    fvec8 t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*x)*x)*x)*x)*x)*x;
+    ivec4 indexLower = index.lowerVec();
-    t *= t;
+    ivec4 indexUpper = index.upperVec();
-    t *= t;
+    fvec4 t1(&erfcTable[indexLower[0]]);
-    t *= t;
+    fvec4 t2(&erfcTable[indexLower[1]]);
-    return 1.0f/(t*t);
+    fvec4 t3(&erfcTable[indexLower[2]]);
+    fvec4 t4(&erfcTable[indexLower[3]]);
+    fvec4 t5(&erfcTable[indexUpper[0]]);
+    fvec4 t6(&erfcTable[indexUpper[1]]);
+    fvec4 t7(&erfcTable[indexUpper[2]]);
+    fvec4 t8(&erfcTable[indexUpper[3]]);
+    fvec8 s1, s2, s3, s4;
+    transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
+    return coeff1*s1 + coeff2*s2;
 }
 fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {

--- a/platforms/cpu/src/gmx_atomic.h
+++ b/platforms/cpu/src/gmx_atomic.h
@@ -64,6 +64,7 @@
 #include <stdio.h>
+#define NOMINMAX
 #include <pthread.h>
 #ifdef __cplusplus