Merge branch 'master' of github.com:SimTk/openmm

Conflicts: wrappers/python/simtk/openmm/app/modeller.py

Merge branch 'master' of github.com:SimTk/openmm
Conflicts: wrappers/python/simtk/openmm/app/modeller.py
d58cc041 · Lee-Ping Wang · d7b3a3c2 · ce8d6a2d · d58cc041 · d58cc041
Commit d58cc041 authored Aug 09, 2013 by Lee-Ping Wang
20 changed files
--- a/docs/UsersGuide/References.Data/rdb/pdf_index.MYD
+++ b/docs/UsersGuide/References.Data/rdb/pdf_index.MYD
--- a/docs/UsersGuide/References.Data/rdb/pdf_index.MYI
+++ b/docs/UsersGuide/References.Data/rdb/pdf_index.MYI
--- a/docs/UsersGuide/References.Data/rdb/refs.MYD
+++ b/docs/UsersGuide/References.Data/rdb/refs.MYD
--- a/docs/UsersGuide/References.Data/rdb/refs.MYI
+++ b/docs/UsersGuide/References.Data/rdb/refs.MYI
--- a/docs/UsersGuide/References.Data/rdb/refs_ext.MYD
+++ b/docs/UsersGuide/References.Data/rdb/refs_ext.MYD
--- a/docs/UsersGuide/References.Data/rdb/refs_ext.MYI
+++ b/docs/UsersGuide/References.Data/rdb/refs_ext.MYI
--- a/docs/UsersGuide/References.Data/rdb/terms.MYD
+++ b/docs/UsersGuide/References.Data/rdb/terms.MYD
--- a/docs/UsersGuide/References.Data/rdb/terms.MYI
+++ b/docs/UsersGuide/References.Data/rdb/terms.MYI
--- a/libraries/csha1/include/SHA1.h
+++ b/libraries/csha1/include/SHA1.h
+/*
+  100% free public domain implementation of the SHA-1 algorithm
+  by Dominik Reichl <dominik.reichl@t-online.de>
+  Web: http://www.dominik-reichl.de/
+  Version 2.1 - 2012-06-19
+  - Deconstructor (resetting internal variables) is now only
+    implemented if SHA1_WIPE_VARIABLES is defined (which is the
+    default).
+  - Renamed inclusion guard to contain a GUID.
+  - Demo application is now using C++/STL objects and functions.
+  - Unicode build of the demo application now outputs the hashes of both
+    the ANSI and Unicode representations of strings.
+  - Various other demo application improvements.
+  Version 2.0 - 2012-06-14
+  - Added 'limits.h' include.
+  - Renamed inclusion guard and macros for compliancy (names beginning
+    with an underscore are reserved).
+  Version 1.9 - 2011-11-10
+  - Added Unicode test vectors.
+  - Improved support for hashing files using the HashFile method that
+    are larger than 4 GB.
+  - Improved file hashing performance (by using a larger buffer).
+  - Disabled unnecessary compiler warnings.
+  - Internal variables are now private.
+  Version 1.8 - 2009-03-16
+  - Converted project files to Visual Studio 2008 format.
+  - Added Unicode support for HashFile utility method.
+  - Added support for hashing files using the HashFile method that are
+    larger than 2 GB.
+  - HashFile now returns an error code instead of copying an error
+    message into the output buffer.
+  - GetHash now returns an error code and validates the input parameter.
+  - Added ReportHashStl STL utility method.
+  - Added REPORT_HEX_SHORT reporting mode.
+  - Improved Linux compatibility of test program.
+  Version 1.7 - 2006-12-21
+  - Fixed buffer underrun warning that appeared when compiling with
+    Borland C Builder (thanks to Rex Bloom and Tim Gallagher for the
+    patch).
+  - Breaking change: ReportHash writes the final hash to the start
+    of the buffer, i.e. it's not appending it to the string anymore.
+  - Made some function parameters const.
+  - Added Visual Studio 2005 project files to demo project.
+  Version 1.6 - 2005-02-07 (thanks to Howard Kapustein for patches)
+  - You can set the endianness in your files, no need to modify the
+    header file of the CSHA1 class anymore.
+  - Aligned data support.
+  - Made support/compilation of the utility functions (ReportHash and
+    HashFile) optional (useful when bytes count, for example in embedded
+    environments).
+  Version 1.5 - 2005-01-01
+  - 64-bit compiler compatibility added.
+  - Made variable wiping optional (define SHA1_WIPE_VARIABLES).
+  - Removed unnecessary variable initializations.
+  - ROL32 improvement for the Microsoft compiler (using _rotl).
+  Version 1.4 - 2004-07-22
+  - CSHA1 now compiles fine with GCC 3.3 under Mac OS X (thanks to Larry
+    Hastings).
+  Version 1.3 - 2003-08-17
+  - Fixed a small memory bug and made a buffer array a class member to
+    ensure correct working when using multiple CSHA1 class instances at
+    one time.
+  Version 1.2 - 2002-11-16
+  - Borlands C++ compiler seems to have problems with string addition
+    using sprintf. Fixed the bug which caused the digest report function
+    not to work properly. CSHA1 is now Borland compatible.
+  Version 1.1 - 2002-10-11
+  - Removed two unnecessary header file includes and changed BOOL to
+    bool. Fixed some minor bugs in the web page contents.
+  Version 1.0 - 2002-06-20
+  - First official release.
+  ================ Test Vectors ================
+  SHA1("abc" in ANSI) =
+    A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+  SHA1("abc" in Unicode LE) =
+    9F04F41A 84851416 2050E3D6 8C1A7ABB 441DC2B5
+  SHA1("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+    in ANSI) =
+    84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+  SHA1("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+    in Unicode LE) =
+    51D7D876 9AC72C40 9C5B0E3F 69C60ADC 9A039014
+  SHA1(A million repetitions of "a" in ANSI) =
+    34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+  SHA1(A million repetitions of "a" in Unicode LE) =
+    C4609560 A108A0C6 26AA7F2B 38A65566 739353C5
+*/
+#ifndef SHA1_H_A545E61D43E9404E8D736869AB3CBFE7
+#define SHA1_H_A545E61D43E9404E8D736869AB3CBFE7
+#if !defined(SHA1_UTILITY_FUNCTIONS) && !defined(SHA1_NO_UTILITY_FUNCTIONS)
+#define SHA1_UTILITY_FUNCTIONS
+#endif
+#if !defined(SHA1_STL_FUNCTIONS) && !defined(SHA1_NO_STL_FUNCTIONS)
+#define SHA1_STL_FUNCTIONS
+#if !defined(SHA1_UTILITY_FUNCTIONS)
+#error STL functions require SHA1_UTILITY_FUNCTIONS.
+#endif
+#endif
+#include <memory.h>
+#include <limits.h>
+#ifdef SHA1_UTILITY_FUNCTIONS
+#include <stdio.h>
+#include <string.h>
+#endif
+#ifdef SHA1_STL_FUNCTIONS
+#include <string>
+#endif
+#ifdef _MSC_VER
+#include <stdlib.h>
+#endif
+#include "openmm/internal/windowsExport.h"
+// You can define the endian mode in your files without modifying the SHA-1
+// source files. Just #define SHA1_LITTLE_ENDIAN or #define SHA1_BIG_ENDIAN
+// in your files, before including the SHA1.h header file. If you don't
+// define anything, the class defaults to little endian.
+#if !defined(SHA1_LITTLE_ENDIAN) && !defined(SHA1_BIG_ENDIAN)
+#define SHA1_LITTLE_ENDIAN
+#endif
+// If you want variable wiping, #define SHA1_WIPE_VARIABLES, if not,
+// #define SHA1_NO_WIPE_VARIABLES. If you don't define anything, it
+// defaults to wiping.
+#if !defined(SHA1_WIPE_VARIABLES) && !defined(SHA1_NO_WIPE_VARIABLES)
+#define SHA1_WIPE_VARIABLES
+#endif
+#if defined(SHA1_HAS_TCHAR)
+#include <tchar.h>
+#else
+#ifdef _MSC_VER
+#include <tchar.h>
+#else
+#ifndef TCHAR
+#define TCHAR char
+#endif
+#ifndef _T
+#define _T(__x) (__x)
+#define _tmain main
+#define _tprintf printf
+#define _getts gets
+#define _tcslen strlen
+#define _tfopen fopen
+#define _tcscpy strcpy
+#define _tcscat strcat
+#define _sntprintf snprintf
+#endif
+#endif
+#endif
+///////////////////////////////////////////////////////////////////////////
+// Define variable types
+#ifndef UINT_8
+#ifdef _MSC_VER // Compiling with Microsoft compiler
+#define UINT_8 unsigned __int8
+#else // !_MSC_VER
+#define UINT_8 unsigned char
+#endif // _MSC_VER
+#endif
+#ifndef UINT_32
+#ifdef _MSC_VER // Compiling with Microsoft compiler
+#define UINT_32 unsigned __int32
+#else // !_MSC_VER
+#if (ULONG_MAX == 0xFFFFFFFFUL)
+#define UINT_32 unsigned long
+#else
+#define UINT_32 unsigned int
+#endif
+#endif // _MSC_VER
+#endif // UINT_32
+#ifndef INT_64
+#ifdef _MSC_VER // Compiling with Microsoft compiler
+#define INT_64 __int64
+#else // !_MSC_VER
+#define INT_64 long long
+#endif // _MSC_VER
+#endif // INT_64
+#ifndef UINT_64
+#ifdef _MSC_VER // Compiling with Microsoft compiler
+#define UINT_64 unsigned __int64
+#else // !_MSC_VER
+#define UINT_64 unsigned long long
+#endif // _MSC_VER
+#endif // UINT_64
+///////////////////////////////////////////////////////////////////////////
+// Declare SHA-1 workspace
+typedef union
+{
+	UINT_8 c[64];
+	UINT_32 l[16];
+} SHA1_WORKSPACE_BLOCK;
+class OPENMM_EXPORT CSHA1
+{
+public:
+#ifdef SHA1_UTILITY_FUNCTIONS
+	// Different formats for ReportHash(Stl)
+	enum REPORT_TYPE
+	{
+		REPORT_HEX = 0,
+		REPORT_DIGIT = 1,
+		REPORT_HEX_SHORT = 2
+	};
+#endif
+	// Constructor and destructor
+	CSHA1();
+#ifdef SHA1_WIPE_VARIABLES
+	~CSHA1();
+#endif
+	void Reset();
+	// Hash in binary data and strings
+	void Update(const UINT_8* pbData, UINT_32 uLen);
+#ifdef SHA1_UTILITY_FUNCTIONS
+	// Hash in file contents
+	bool HashFile(const TCHAR* tszFileName);
+#endif
+	// Finalize hash; call it before using ReportHash(Stl)
+	void Final();
+#ifdef SHA1_UTILITY_FUNCTIONS
+	bool ReportHash(TCHAR* tszReport, REPORT_TYPE rtReportType = REPORT_HEX) const;
+#endif
+#ifdef SHA1_STL_FUNCTIONS
+	bool ReportHashStl(std::basic_string<TCHAR>& strOut, REPORT_TYPE rtReportType =
+		REPORT_HEX) const;
+#endif
+	// Get the raw message digest (20 bytes)
+	bool GetHash(UINT_8* pbDest20) const;
+private:
+	// Private SHA-1 transformation
+	void Transform(UINT_32* pState, const UINT_8* pBuffer);
+	// Member variables
+	UINT_32 m_state[5];
+	UINT_32 m_count[2];
+	UINT_32 m_reserved0[1]; // Memory alignment padding
+	UINT_8 m_buffer[64];
+	UINT_8 m_digest[20];
+	UINT_32 m_reserved1[3]; // Memory alignment padding
+	UINT_8 m_workspace[64];
+	SHA1_WORKSPACE_BLOCK* m_block; // SHA1 pointer to the byte array above
+};
+#endif // SHA1_H_A545E61D43E9404E8D736869AB3CBFE7
--- a/libraries/csha1/src/SHA1.cpp
+++ b/libraries/csha1/src/SHA1.cpp
+/*
+  100% free public domain implementation of the SHA-1 algorithm
+  by Dominik Reichl <dominik.reichl@t-online.de>
+  Web: http://www.dominik-reichl.de/
+  See header file for version history and test vectors.
+*/
+// If compiling with MFC, you might want to add #include "StdAfx.h"
+#define _CRT_SECURE_NO_WARNINGS
+#include "SHA1.h"
+#define SHA1_MAX_FILE_BUFFER (32 * 20 * 820)
+// Rotate p_val32 by p_nBits bits to the left
+#ifndef ROL32
+#ifdef _MSC_VER
+#define ROL32(p_val32,p_nBits) _rotl(p_val32,p_nBits)
+#else
+#define ROL32(p_val32,p_nBits) (((p_val32)<<(p_nBits))|((p_val32)>>(32-(p_nBits))))
+#endif
+#endif
+#ifdef SHA1_LITTLE_ENDIAN
+#define SHABLK0(i) (m_block->l[i] = \
+	(ROL32(m_block->l[i],24) & 0xFF00FF00) | (ROL32(m_block->l[i],8) & 0x00FF00FF))
+#else
+#define SHABLK0(i) (m_block->l[i])
+#endif
+#define SHABLK(i) (m_block->l[i&15] = ROL32(m_block->l[(i+13)&15] ^ \
+	m_block->l[(i+8)&15] ^ m_block->l[(i+2)&15] ^ m_block->l[i&15],1))
+// SHA-1 rounds
+#define S_R0(v,w,x,y,z,i) {z+=((w&(x^y))^y)+SHABLK0(i)+0x5A827999+ROL32(v,5);w=ROL32(w,30);}
+#define S_R1(v,w,x,y,z,i) {z+=((w&(x^y))^y)+SHABLK(i)+0x5A827999+ROL32(v,5);w=ROL32(w,30);}
+#define S_R2(v,w,x,y,z,i) {z+=(w^x^y)+SHABLK(i)+0x6ED9EBA1+ROL32(v,5);w=ROL32(w,30);}
+#define S_R3(v,w,x,y,z,i) {z+=(((w|x)&y)|(w&x))+SHABLK(i)+0x8F1BBCDC+ROL32(v,5);w=ROL32(w,30);}
+#define S_R4(v,w,x,y,z,i) {z+=(w^x^y)+SHABLK(i)+0xCA62C1D6+ROL32(v,5);w=ROL32(w,30);}
+#pragma warning(push)
+// Disable compiler warning 'Conditional expression is constant'
+#pragma warning(disable: 4127)
+CSHA1::CSHA1()
+{
+	m_block = (SHA1_WORKSPACE_BLOCK*)m_workspace;
+	Reset();
+}
+#ifdef SHA1_WIPE_VARIABLES
+CSHA1::~CSHA1()
+{
+	Reset();
+}
+#endif
+void CSHA1::Reset()
+{
+	// SHA1 initialization constants
+	m_state[0] = 0x67452301;
+	m_state[1] = 0xEFCDAB89;
+	m_state[2] = 0x98BADCFE;
+	m_state[3] = 0x10325476;
+	m_state[4] = 0xC3D2E1F0;
+	m_count[0] = 0;
+	m_count[1] = 0;
+}
+void CSHA1::Transform(UINT_32* pState, const UINT_8* pBuffer)
+{
+	UINT_32 a = pState[0], b = pState[1], c = pState[2], d = pState[3], e = pState[4];
+	memcpy(m_block, pBuffer, 64);
+	// 4 rounds of 20 operations each, loop unrolled
+	S_R0(a,b,c,d,e, 0); S_R0(e,a,b,c,d, 1); S_R0(d,e,a,b,c, 2); S_R0(c,d,e,a,b, 3);
+	S_R0(b,c,d,e,a, 4); S_R0(a,b,c,d,e, 5); S_R0(e,a,b,c,d, 6); S_R0(d,e,a,b,c, 7);
+	S_R0(c,d,e,a,b, 8); S_R0(b,c,d,e,a, 9); S_R0(a,b,c,d,e,10); S_R0(e,a,b,c,d,11);
+	S_R0(d,e,a,b,c,12); S_R0(c,d,e,a,b,13); S_R0(b,c,d,e,a,14); S_R0(a,b,c,d,e,15);
+	S_R1(e,a,b,c,d,16); S_R1(d,e,a,b,c,17); S_R1(c,d,e,a,b,18); S_R1(b,c,d,e,a,19);
+	S_R2(a,b,c,d,e,20); S_R2(e,a,b,c,d,21); S_R2(d,e,a,b,c,22); S_R2(c,d,e,a,b,23);
+	S_R2(b,c,d,e,a,24); S_R2(a,b,c,d,e,25); S_R2(e,a,b,c,d,26); S_R2(d,e,a,b,c,27);
+	S_R2(c,d,e,a,b,28); S_R2(b,c,d,e,a,29); S_R2(a,b,c,d,e,30); S_R2(e,a,b,c,d,31);
+	S_R2(d,e,a,b,c,32); S_R2(c,d,e,a,b,33); S_R2(b,c,d,e,a,34); S_R2(a,b,c,d,e,35);
+	S_R2(e,a,b,c,d,36); S_R2(d,e,a,b,c,37); S_R2(c,d,e,a,b,38); S_R2(b,c,d,e,a,39);
+	S_R3(a,b,c,d,e,40); S_R3(e,a,b,c,d,41); S_R3(d,e,a,b,c,42); S_R3(c,d,e,a,b,43);
+	S_R3(b,c,d,e,a,44); S_R3(a,b,c,d,e,45); S_R3(e,a,b,c,d,46); S_R3(d,e,a,b,c,47);
+	S_R3(c,d,e,a,b,48); S_R3(b,c,d,e,a,49); S_R3(a,b,c,d,e,50); S_R3(e,a,b,c,d,51);
+	S_R3(d,e,a,b,c,52); S_R3(c,d,e,a,b,53); S_R3(b,c,d,e,a,54); S_R3(a,b,c,d,e,55);
+	S_R3(e,a,b,c,d,56); S_R3(d,e,a,b,c,57); S_R3(c,d,e,a,b,58); S_R3(b,c,d,e,a,59);
+	S_R4(a,b,c,d,e,60); S_R4(e,a,b,c,d,61); S_R4(d,e,a,b,c,62); S_R4(c,d,e,a,b,63);
+	S_R4(b,c,d,e,a,64); S_R4(a,b,c,d,e,65); S_R4(e,a,b,c,d,66); S_R4(d,e,a,b,c,67);
+	S_R4(c,d,e,a,b,68); S_R4(b,c,d,e,a,69); S_R4(a,b,c,d,e,70); S_R4(e,a,b,c,d,71);
+	S_R4(d,e,a,b,c,72); S_R4(c,d,e,a,b,73); S_R4(b,c,d,e,a,74); S_R4(a,b,c,d,e,75);
+	S_R4(e,a,b,c,d,76); S_R4(d,e,a,b,c,77); S_R4(c,d,e,a,b,78); S_R4(b,c,d,e,a,79);
+	// Add the working vars back into state
+	pState[0] += a;
+	pState[1] += b;
+	pState[2] += c;
+	pState[3] += d;
+	pState[4] += e;
+	// Wipe variables
+#ifdef SHA1_WIPE_VARIABLES
+	a = b = c = d = e = 0;
+#endif
+}
+void CSHA1::Update(const UINT_8* pbData, UINT_32 uLen)
+{
+	UINT_32 j = ((m_count[0] >> 3) & 0x3F);
+	if((m_count[0] += (uLen << 3)) < (uLen << 3))
+		++m_count[1]; // Overflow
+	m_count[1] += (uLen >> 29);
+	UINT_32 i;
+	if((j + uLen) > 63)
+	{
+		i = 64 - j;
+		memcpy(&m_buffer[j], pbData, i);
+		Transform(m_state, m_buffer);
+		for( ; (i + 63) < uLen; i += 64)
+			Transform(m_state, &pbData[i]);
+		j = 0;
+	}
+	else i = 0;
+	if((uLen - i) != 0)
+		memcpy(&m_buffer[j], &pbData[i], uLen - i);
+}
+#ifdef SHA1_UTILITY_FUNCTIONS
+bool CSHA1::HashFile(const TCHAR* tszFileName)
+{
+	if(tszFileName == NULL) return false;
+	FILE* fpIn = _tfopen(tszFileName, _T("rb"));
+	if(fpIn == NULL) return false;
+	UINT_8* pbData = new UINT_8[SHA1_MAX_FILE_BUFFER];
+	if(pbData == NULL) { fclose(fpIn); return false; }
+	bool bSuccess = true;
+	while(true)
+	{
+		const size_t uRead = fread(pbData, 1, SHA1_MAX_FILE_BUFFER, fpIn);
+		if(uRead > 0)
+			Update(pbData, static_cast<UINT_32>(uRead));
+		if(uRead < SHA1_MAX_FILE_BUFFER)
+		{
+			if(feof(fpIn) == 0) bSuccess = false;
+			break;
+		}
+	}
+	fclose(fpIn);
+	delete[] pbData;
+	return bSuccess;
+}
+#endif
+void CSHA1::Final()
+{
+	UINT_32 i;
+	UINT_8 pbFinalCount[8];
+	for(i = 0; i < 8; ++i)
+		pbFinalCount[i] = static_cast<UINT_8>((m_count[((i >= 4) ? 0 : 1)] >>
+			((3 - (i & 3)) * 8) ) & 0xFF); // Endian independent
+	Update((UINT_8*)"\200", 1);
+	while((m_count[0] & 504) != 448)
+		Update((UINT_8*)"\0", 1);
+	Update(pbFinalCount, 8); // Cause a Transform()
+	for(i = 0; i < 20; ++i)
+		m_digest[i] = static_cast<UINT_8>((m_state[i >> 2] >> ((3 -
+			(i & 3)) * 8)) & 0xFF);
+	// Wipe variables for security reasons
+#ifdef SHA1_WIPE_VARIABLES
+	memset(m_buffer, 0, 64);
+	memset(m_state, 0, 20);
+	memset(m_count, 0, 8);
+	memset(pbFinalCount, 0, 8);
+	Transform(m_state, m_buffer);
+#endif
+}
+#ifdef SHA1_UTILITY_FUNCTIONS
+bool CSHA1::ReportHash(TCHAR* tszReport, REPORT_TYPE rtReportType) const
+{
+	if(tszReport == NULL) return false;
+	TCHAR tszTemp[16];
+	if((rtReportType == REPORT_HEX) || (rtReportType == REPORT_HEX_SHORT))
+	{
+		_sntprintf(tszTemp, 15, _T("%02X"), m_digest[0]);
+		_tcscpy(tszReport, tszTemp);
+		const TCHAR* lpFmt = ((rtReportType == REPORT_HEX) ? _T(" %02X") : _T("%02X"));
+		for(size_t i = 1; i < 20; ++i)
+		{
+			_sntprintf(tszTemp, 15, lpFmt, m_digest[i]);
+			_tcscat(tszReport, tszTemp);
+		}
+	}
+	else if(rtReportType == REPORT_DIGIT)
+	{
+		_sntprintf(tszTemp, 15, _T("%u"), m_digest[0]);
+		_tcscpy(tszReport, tszTemp);
+		for(size_t i = 1; i < 20; ++i)
+		{
+			_sntprintf(tszTemp, 15, _T(" %u"), m_digest[i]);
+			_tcscat(tszReport, tszTemp);
+		}
+	}
+	else return false;
+	return true;
+}
+#endif
+#ifdef SHA1_STL_FUNCTIONS
+bool CSHA1::ReportHashStl(std::basic_string<TCHAR>& strOut, REPORT_TYPE rtReportType) const
+{
+	TCHAR tszOut[84];
+	const bool bResult = ReportHash(tszOut, rtReportType);
+	if(bResult) strOut = tszOut;
+	return bResult;
+}
+#endif
+bool CSHA1::GetHash(UINT_8* pbDest20) const
+{
+	if(pbDest20 == NULL) return false;
+	memcpy(pbDest20, m_digest, 20);
+	return true;
+}
+#pragma warning(pop)
\ No newline at end of file
--- a/olla/src/Platform.cpp
+++ b/olla/src/Platform.cpp
@@ -279,8 +279,12 @@ const string& Platform::getDefaultPluginsDirectory() {
    return directory;
 }
+// Some bizarre preprocessor magic required to convert a macro to a string...
+#define STRING1(x) #x
+#define STRING(x) STRING1(x)
 const string& Platform::getOpenMMVersion() {
-    static const string version = "5.1";
+    static const string version = STRING(OPENMM_MAJOR_VERSION) "." STRING(OPENMM_MINOR_VERSION);
    return version;
 }

--- a/openmmapi/include/openmm/MonteCarloAnisotropicBarostat.h
+++ b/openmmapi/include/openmm/MonteCarloAnisotropicBarostat.h
@@ -86,12 +86,12 @@ public:
     *
     * @param defaultPressure   The default pressure acting on each axis (in bar)
     * @param temperature       the temperature at which the system is being maintained (in Kelvin)
-     * @param frequency         the frequency at which Monte Carlo pressure changes should be attempted (in time steps)
     * @param scaleX            whether to allow the X dimension of the periodic box to change size
     * @param scaleY            whether to allow the Y dimension of the periodic box to change size
     * @param scaleZ            whether to allow the Z dimension of the periodic box to change size
+     * @param frequency         the frequency at which Monte Carlo pressure changes should be attempted (in time steps)
     */
-    MonteCarloAnisotropicBarostat(const Vec3& defaultPressure, double temperature, int frequency = 25, bool scaleX = 1, bool scaleY = 1, bool scaleZ = 1);
+    MonteCarloAnisotropicBarostat(const Vec3& defaultPressure, double temperature, bool scaleX = true, bool scaleY = true, bool scaleZ = true, int frequency = 25);
    /**
     * Get the default pressure (in bar).
     *

--- a/openmmapi/include/openmm/internal/ContextImpl.h
+++ b/openmmapi/include/openmm/internal/ContextImpl.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2013 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -250,9 +250,13 @@ public:
    void integratorDeleted() {
        integratorIsDeleted = true;
    }
+    /**
+     * This is the routine that actually computes the list of molecules returned by getMolecules().  Normally
+     * you should never call it.  It is exposed here because the same logic is useful to other classes too.
+     */
+    static std::vector<std::vector<int> > findMolecules(int numParticles, std::vector<std::vector<int> >& particleBonds);
 private:
    friend class Context;
-    static void tagParticlesInMolecule(int particle, int molecule, std::vector<int>& particleMolecule, std::vector<std::vector<int> >& particleBonds);
    Context& owner;
    const System& system;
    Integrator& integrator;

--- a/openmmapi/src/ContextImpl.cpp
+++ b/openmmapi/src/ContextImpl.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2013 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -288,26 +288,54 @@ const vector<vector<int> >& ContextImpl::getMolecules() const {
        particleBonds[bonds[i].second].push_back(bonds[i].first);
    }
-    // Now tag particles by which molecule they belong to.
+    // Now identify particles by which molecule they belong to.
+    molecules = findMolecules(numParticles, particleBonds);
+    return molecules;
+}
+vector<vector<int> > ContextImpl::findMolecules(int numParticles, vector<vector<int> >& particleBonds) {
+    // This is essentially a recursive algorithm, but it is reformulated as a loop to avoid
+    // stack overflows.  It selects a particle, marks it as a new molecule, then recursively
+    // marks every particle bonded to it as also being in that molecule.
    vector<int> particleMolecule(numParticles, -1);
    int numMolecules = 0;
    for (int i = 0; i < numParticles; i++)
-        if (particleMolecule[i] == -1)
+        if (particleMolecule[i] == -1) {
-            tagParticlesInMolecule(i, numMolecules++, particleMolecule, particleBonds);
+            // Start a new molecule.
-    molecules.resize(numMolecules);
-    for (int i = 0; i < numParticles; i++)
+            vector<int> particleStack;
-        molecules[particleMolecule[i]].push_back(i);
+            vector<int> neighborStack;
-    return molecules;
+            particleStack.push_back(i);
-}
+            neighborStack.push_back(0);
+            int molecule = numMolecules++;
-void ContextImpl::tagParticlesInMolecule(int particle, int molecule, vector<int>& particleMolecule, vector<vector<int> >& particleBonds) {
+            // Recursively tag all the bonded particles.
-    // Recursively tag particles as belonging to a particular molecule.
+            while (particleStack.size() > 0) {
+                int particle = particleStack.back();
                particleMolecule[particle] = molecule;
-    for (int i = 0; i < (int) particleBonds[particle].size(); i++)
+                int& neighbor = neighborStack.back();
-        if (particleMolecule[particleBonds[particle][i]] == -1)
+                while (neighbor < particleBonds[particle].size() && particleMolecule[particleBonds[particle][neighbor]] != -1)
-            tagParticlesInMolecule(particleBonds[particle][i], molecule, particleMolecule, particleBonds);
+                    neighbor++;
+                if (neighbor < particleBonds[particle].size()) {
+                    particleStack.push_back(particleBonds[particle][neighbor]);
+                    neighborStack.push_back(0);
+                }
+                else {
+                    particleStack.pop_back();
+                    neighborStack.pop_back();
+                }
+            }
+        }
+    // Build the final output vector.
+    vector<vector<int> > molecules(numMolecules);
+    for (int i = 0; i < numParticles; i++)
+        molecules[particleMolecule[i]].push_back(i);
+    return molecules;
 }
 static void writeString(ostream& stream, string str) {

--- a/openmmapi/src/MonteCarloAnisotropicBarostat.cpp
+++ b/openmmapi/src/MonteCarloAnisotropicBarostat.cpp
@@ -35,8 +35,8 @@
 using namespace OpenMM;
-MonteCarloAnisotropicBarostat::MonteCarloAnisotropicBarostat(const Vec3& defaultPressure, double temperature, int frequency, bool scaleX, bool scaleY, bool scaleZ) :
+MonteCarloAnisotropicBarostat::MonteCarloAnisotropicBarostat(const Vec3& defaultPressure, double temperature, bool scaleX, bool scaleY, bool scaleZ, int frequency) :
-        defaultPressure(defaultPressure), temperature(temperature), frequency(frequency), scaleX(scaleX), scaleY(scaleY), scaleZ(scaleZ) {
+        defaultPressure(defaultPressure), temperature(temperature), scaleX(scaleX), scaleY(scaleY), scaleZ(scaleZ), frequency(frequency) {
    setRandomNumberSeed((int) time(NULL));
 }

--- a/platforms/cuda/include/CudaContext.h
+++ b/platforms/cuda/include/CudaContext.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2013 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -489,7 +489,6 @@ private:
    struct MoleculeGroup;
    class VirtualSiteInfo;
    void findMoleculeGroups();
-    static void tagAtomsInMolecule(int atom, int molecule, std::vector<int>& atomMolecule, std::vector<std::vector<int> >& atomBonds);
    /**
     * Ensure that all molecules marked as "identical" really are identical.  This should be
     * called whenever force field parameters change.  If necessary, it will rebuild the list
@@ -515,7 +514,7 @@ private:
    int numAtomBlocks;
    int numThreadBlocks;
    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered;
-    std::string compiler, tempDir, gpuArchitecture;
+    std::string compiler, tempDir, cacheDir, gpuArchitecture;
    float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
    double4 periodicBoxSize, invPeriodicBoxSize;
    std::string defaultOptimizationOptions;

--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2013 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -35,19 +35,25 @@
 #include "CudaIntegrationUtilities.h"
 #include "CudaKernelSources.h"
 #include "CudaNonbondedUtilities.h"
+#include "SHA1.h"
 #include "hilbert.h"
 #include "openmm/OpenMMException.h"
 #include "openmm/Platform.h"
 #include "openmm/System.h"
 #include "openmm/VirtualSite.h"
 #include "CudaExpressionUtilities.h"
+#include "openmm/internal/ContextImpl.h"
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <typeinfo>
 #include <cudaProfiler.h>
+#ifndef WIN32
+  #include <unistd.h>
+#endif
 #define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
@@ -87,10 +93,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
    }
    else
        throw OpenMMException("Illegal value for CudaPrecision: "+precision);
+    char* cacheVariable = getenv("OPENMM_CACHE_DIR");
+    cacheDir = (cacheVariable == NULL ? tempDir : string(cacheVariable));
 #ifdef WIN32
    this->tempDir = tempDir+"\\";
+    cacheDir = cacheDir+"\\";
 #else
    this->tempDir = tempDir+"/";
+    cacheDir = cacheDir+"/";
 #endif
    contextIndex = platformData.contexts.size();
    int numDevices;
@@ -347,6 +357,7 @@ static bool compileInWindows(const string &command) {
 #endif
 CUmodule CudaContext::createModule(const string source, const map<string, string>& defines, const char* optimizationFlags) {
+    string bits = intToString(8*sizeof(void*));
    string options = (optimizationFlags == NULL ? defaultOptimizationOptions : string(optimizationFlags));
    stringstream src;
    if (!options.empty())
@@ -394,17 +405,38 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
        src << endl;
    src << source << endl;
+    // See whether we already have PTX for this kernel cached.
+    CSHA1 sha1;
+    sha1.Update((const UINT_8*) src.str().c_str(), src.str().size());
+    sha1.Final();
+    UINT_8 hash[20];
+    sha1.GetHash(hash);
+    stringstream cacheFile;
+    cacheFile << cacheDir;
+    cacheFile.flags(ios::hex);
+    for (int i = 0; i < 20; i++)
+        cacheFile << setw(2) << setfill('0') << (int) hash[i];
+    cacheFile << '_' << gpuArchitecture << '_' << bits;
+    CUmodule module;
+    if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS)
+        return module;
    // Write out the source to a temporary file.
    stringstream tempFileName;
    tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
+#ifdef WIN32
+    tempFileName << "_" << GetCurrentProcessId();
+#else
+    tempFileName << "_" << getpid();
+#endif
    string inputFile = (tempDir+tempFileName.str()+".cu");
    string outputFile = (tempDir+tempFileName.str()+".ptx");
    string logFile = (tempDir+tempFileName.str()+".log");
    ofstream out(inputFile.c_str());
    out << src.str();
    out.close();
-    string bits = intToString(8*sizeof(void*));
 #ifdef WIN32
 #ifdef _DEBUG
    string command = "\""+compiler+"\" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
@@ -433,7 +465,6 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
            }
            throw OpenMMException(error.str());
        }
-        CUmodule module;
        CUresult result = cuModuleLoad(&module, outputFile.c_str());
        if (result != CUDA_SUCCESS) {
            std::stringstream m;
@@ -441,6 +472,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
            throw OpenMMException(m.str());
        }
        remove(inputFile.c_str());
+        if (rename(outputFile.c_str(), cacheFile.str().c_str()) != 0)
            remove(outputFile.c_str());
        remove(logFile.c_str());
        return module;
@@ -616,15 +648,6 @@ void CudaContext::clearAutoclearBuffers() {
    }
 }
-void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) {
-    // Recursively tag atoms as belonging to a particular molecule.
-    atomMolecule[atom] = molecule;
-    for (int i = 0; i < (int) atomBonds[atom].size(); i++)
-        if (atomMolecule[atomBonds[atom][i]] == -1)
-            tagAtomsInMolecule(atomBonds[atom][i], molecule, atomMolecule, atomBonds);
-}
 /**
 * This class ensures that atom reordering doesn't break virtual sites.
 */
@@ -719,16 +742,14 @@ void CudaContext::findMoleculeGroups() {
            }
        }
-        // Now tag atoms by which molecule they belong to.
+        // Now identify atoms by which molecule they belong to.
-        vector<int> atomMolecule(numAtoms, -1);
+        vector<vector<int> > atomIndices = ContextImpl::findMolecules(numAtoms, atomBonds);
-        int numMolecules = 0;
+        int numMolecules = atomIndices.size();
-        for (int i = 0; i < numAtoms; i++)
+        vector<int> atomMolecule(numAtoms);
-            if (atomMolecule[i] == -1)
+        for (int i = 0; i < (int) atomIndices.size(); i++)
-                tagAtomsInMolecule(i, numMolecules++, atomMolecule, atomBonds);
+            for (int j = 0; j < (int) atomIndices[i].size(); j++)
-        vector<vector<int> > atomIndices(numMolecules);
+                atomMolecule[atomIndices[i][j]] = i;
-        for (int i = 0; i < numAtoms; i++)
-            atomIndices[atomMolecule[i]].push_back(i);
        // Construct a description of each molecule.

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -388,7 +388,6 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
    if (!hasInitializedKernel) {
        hasInitializedKernel = true;
        map<string, string> defines;
-        defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
        CUmodule module = cu.createModule(CudaKernelSources::constraints, defines);
        applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas");
    }
@@ -396,7 +395,8 @@ void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
    cu.clearBuffer(integration.getPosDelta());
    integration.applyConstraints(tol);
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
+    int numAtoms = cu.getNumAtoms();
+    void* args[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
    cu.executeKernel(applyDeltasKernel, args, cu.getNumAtoms());
    integration.computeVirtualSites();
 }
@@ -4156,8 +4156,6 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
    cu.getPlatformData().initializeContexts(system);
    cu.setAsCurrent();
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
    kernel1 = cu.getKernel(module, "integrateVerletPart1");
    kernel2 = cu.getKernel(module, "integrateVerletPart2");
@@ -4168,6 +4166,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    int numAtoms = cu.getNumAtoms();
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
    double dt = integrator.getStepSize();
    if (dt != prevStepSize) {
        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
@@ -4186,7 +4185,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
    // Call the first integration kernel.
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args1[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+    void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
            &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
    cu.executeKernel(kernel1, args1, numAtoms);
@@ -4196,7 +4195,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
    // Call the second integration kernel.
-    void* args2[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+    void* args2[] = {&numAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
            &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
    integration.computeVirtualSites();
@@ -4223,8 +4222,6 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
    cu.setAsCurrent();
    cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, "");
    kernel1 = cu.getKernel(module, "integrateLangevinPart1");
    kernel2 = cu.getKernel(module, "integrateLangevinPart2");
@@ -4236,6 +4233,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    int numAtoms = cu.getNumAtoms();
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
@@ -4273,7 +4271,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
    // Call the first integration kernel.
    int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
-    void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
+    void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
            &params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
    cu.executeKernel(kernel1, args1, numAtoms);
@@ -4284,7 +4282,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
    // Call the second integration kernel.
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
+    void* args2[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
            &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
    integration.computeVirtualSites();
@@ -4308,8 +4306,6 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
    cu.setAsCurrent();
    cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::brownian, defines, "");
    kernel1 = cu.getKernel(module, "integrateBrownianPart1");
    kernel2 = cu.getKernel(module, "integrateBrownianPart2");
@@ -4320,6 +4316,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    int numAtoms = cu.getNumAtoms();
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
@@ -4334,7 +4331,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
    // Call the first integration kernel.
    int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
-    void* args1[] = {useDouble ? (void*) &tauDt : (void*) &tauDtFloat,
+    void* args1[] = {&numAtoms, &paddedNumAtoms, useDouble ? (void*) &tauDt : (void*) &tauDtFloat,
            useDouble ? (void*) &noise : (void*) &noiseFloat,
            &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
            &cu.getVelm().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
@@ -4347,7 +4344,7 @@ void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const Browni
    // Call the second integration kernel.
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args2[] = {useDouble ? (void*) &stepSize : (void*) &stepSizeFloat,
+    void* args2[] = {&numAtoms, useDouble ? (void*) &stepSize : (void*) &stepSizeFloat,
            &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
    integration.computeVirtualSites();
@@ -4370,8 +4367,6 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
    cu.getPlatformData().initializeContexts(system);
    cu.setAsCurrent();
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
    kernel1 = cu.getKernel(module, "integrateVerletPart1");
    kernel2 = cu.getKernel(module, "integrateVerletPart2");
@@ -4383,6 +4378,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    int numAtoms = cu.getNumAtoms();
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
    // Select the step size to use.
@@ -4391,7 +4387,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
    double tol = integrator.getErrorTolerance();
    float tolFloat = (float) tol;
    bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
-    void* argsSelect[] = {useDouble ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
+    void* argsSelect[] = {&numAtoms, &paddedNumAtoms, useDouble ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
            useDouble ? (void*) &tol : (void*) &tolFloat,
            &cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
            &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer()};
@@ -4401,7 +4397,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
    // Call the first integration kernel.
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args1[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+    void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
            &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
    cu.executeKernel(kernel1, args1, numAtoms);
@@ -4411,7 +4407,7 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
    // Call the second integration kernel.
-    void* args2[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+    void* args2[] = {&numAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
            &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
    integration.computeVirtualSites();
@@ -4456,8 +4452,6 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
    cu.setAsCurrent();
    cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::langevin, defines, "");
    kernel1 = cu.getKernel(module, "integrateLangevinPart1");
    kernel2 = cu.getKernel(module, "integrateLangevinPart2");
@@ -4471,6 +4465,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
    cu.setAsCurrent();
    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
    int numAtoms = cu.getNumAtoms();
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
    // Select the step size to use.
@@ -4483,7 +4478,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
    double kT = BOLTZ*integrator.getTemperature();
    float kTFloat = (float) kT;
    bool useDouble = cu.getUseDoublePrecision() || cu.getUseMixedPrecision();
-    void* argsSelect[] = {useDouble ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
+    void* argsSelect[] = {&numAtoms, &paddedNumAtoms, useDouble ? (void*) &maxStepSize : (void*) &maxStepSizeFloat,
            useDouble ? (void*) &tol : (void*) &tolFloat,
            useDouble ? (void*) &tau : (void*) &tauFloat,
            useDouble ? (void*) &kT : (void*) &kTFloat,
@@ -4495,7 +4490,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
    // Call the first integration kernel.
    int randomIndex = integration.prepareRandomNumbers(cu.getPaddedNumAtoms());
-    void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
+    void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
            &params->getDevicePointer(), &integration.getStepSize().getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex};
    cu.executeKernel(kernel1, args1, numAtoms);
@@ -4506,7 +4501,7 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
    // Call the second integration kernel.
    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
+    void* args2[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
            &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
    integration.computeVirtualSites();
@@ -5369,7 +5364,6 @@ void CudaApplyAndersenThermostatKernel::initialize(const System& system, const A
    cu.setAsCurrent();
    randomSeed = thermostat.getRandomNumberSeed();
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
    CUmodule module = cu.createModule(CudaKernelSources::andersenThermostat, defines);
    kernel = cu.getKernel(module, "applyAndersenThermostat");
    cu.getIntegrationUtilities().initRandomNumberGenerator(randomSeed);
@@ -5391,7 +5385,8 @@ void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
    float frequency = (float) context.getParameter(AndersenThermostat::CollisionFrequency());
    float kT = (float) (BOLTZ*context.getParameter(AndersenThermostat::Temperature()));
    int randomIndex = cu.getIntegrationUtilities().prepareRandomNumbers(cu.getPaddedNumAtoms());
-    void* args[] = {&frequency, &kT, &cu.getVelm().getDevicePointer(), &cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
+    int numAtoms = cu.getNumAtoms();
+    void* args[] = {&numAtoms, &frequency, &kT, &cu.getVelm().getDevicePointer(), &cu.getIntegrationUtilities().getStepSize().getDevicePointer(),
            &cu.getIntegrationUtilities().getRandom().getDevicePointer(), &randomIndex, &atomGroups->getDevicePointer()};
    cu.executeKernel(kernel, args, cu.getNumAtoms());
 }

--- a/platforms/cuda/src/kernels/andersenThermostat.cu
+++ b/platforms/cuda/src/kernels/andersenThermostat.cu
@@ -2,11 +2,11 @@
 * Apply the Andersen thermostat to adjust particle velocities.
 */
-extern "C" __global__ void applyAndersenThermostat(float collisionFrequency, float kT, mixed4* velm, const mixed4* __restrict__ stepSize, const float4* __restrict__ random,
+extern "C" __global__ void applyAndersenThermostat(int numAtoms, float collisionFrequency, float kT, mixed4* velm, const mixed4* __restrict__ stepSize, const float4* __restrict__ random,
        unsigned int randomIndex, const int* __restrict__ atomGroups) {
    float collisionProbability = 1.0f-expf(-(float) (collisionFrequency*stepSize[0].y));
    float randomRange = erff(collisionProbability/sqrtf(2.0f));
-    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
+    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
        mixed4 velocity = velm[index];
        float4 selectRand = random[randomIndex+atomGroups[index]];
        float4 velRand = random[randomIndex+index];

--- a/platforms/cuda/src/kernels/brownian.cu
+++ b/platforms/cuda/src/kernels/brownian.cu
@@ -2,16 +2,16 @@
 * Perform the first step of Brownian integration.
 */
-extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAmplitude, const long long* __restrict__ force,
+extern "C" __global__ void integrateBrownianPart1(int numAtoms, int paddedNumAtoms, mixed tauDeltaT, mixed noiseAmplitude, const long long* __restrict__ force,
        mixed4* __restrict__ posDelta, const mixed4* __restrict__ velm, const float4* __restrict__ random, unsigned int randomIndex) {
    randomIndex += blockIdx.x*blockDim.x+threadIdx.x;
    const mixed fscale = tauDeltaT/(mixed) 0x100000000;
-    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
+    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
        mixed invMass = velm[index].w;
        if (invMass != 0) {
            posDelta[index].x = fscale*invMass*force[index] + noiseAmplitude*SQRT(invMass)*random[randomIndex].x;
-            posDelta[index].y = fscale*invMass*force[index+PADDED_NUM_ATOMS] + noiseAmplitude*SQRT(invMass)*random[randomIndex].y;
+            posDelta[index].y = fscale*invMass*force[index+paddedNumAtoms] + noiseAmplitude*SQRT(invMass)*random[randomIndex].y;
-            posDelta[index].z = fscale*invMass*force[index+PADDED_NUM_ATOMS*2] + noiseAmplitude*SQRT(invMass)*random[randomIndex].z;
+            posDelta[index].z = fscale*invMass*force[index+paddedNumAtoms*2] + noiseAmplitude*SQRT(invMass)*random[randomIndex].z;
        }
        randomIndex += blockDim.x*gridDim.x;
    }
@@ -21,9 +21,9 @@ extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAm
 * Perform the second step of Brownian integration.
 */
-extern "C" __global__ void integrateBrownianPart2(mixed deltaT, real4* posq, real4* __restrict__ posqCorrection, mixed4* velm, const mixed4* __restrict__ posDelta) {
+extern "C" __global__ void integrateBrownianPart2(int numAtoms, mixed deltaT, real4* posq, real4* __restrict__ posqCorrection, mixed4* velm, const mixed4* __restrict__ posDelta) {
    const mixed oneOverDeltaT = RECIP(deltaT);
-    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
+    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
        if (velm[index].w != 0) {
            mixed4 delta = posDelta[index];
            velm[index].x = oneOverDeltaT*delta.x;