Commit 208d5240 authored by ChayaSt's avatar ChayaSt
Browse files

Merge branch 'master' of https://github.com/pandegroup/openmm into nbfix

parents 79e76a4e 20af24c4
...@@ -17,6 +17,7 @@ env: ...@@ -17,6 +17,7 @@ env:
matrix: matrix:
include: include:
- sudo: required - sudo: required
dist: trusty
env: ==CPU_OPENCL== env: ==CPU_OPENCL==
OPENCL=true OPENCL=true
CUDA=false CUDA=false
...@@ -33,7 +34,9 @@ matrix: ...@@ -33,7 +34,9 @@ matrix:
-DOPENMM_BUILD_AMOEBA_PLUGIN=OFF -DOPENMM_BUILD_AMOEBA_PLUGIN=OFF
-DOPENMM_BUILD_PYTHON_WRAPPERS=OFF -DOPENMM_BUILD_PYTHON_WRAPPERS=OFF
-DOPENMM_BUILD_C_AND_FORTRAN_WRAPPERS=OFF -DOPENMM_BUILD_C_AND_FORTRAN_WRAPPERS=OFF
-DOPENMM_BUILD_EXAMPLES=OFF" -DOPENMM_BUILD_EXAMPLES=OFF
-DOPENCL_INCLUDE_DIR=$HOME/AMDAPPSDK/include
-DOPENCL_LIBRARY=$HOME/AMDAPPSDK/lib/x86_64/libOpenCL.so"
addons: {apt: {packages: []}} addons: {apt: {packages: []}}
- sudo: required - sudo: required
...@@ -107,9 +110,16 @@ before_install: ...@@ -107,9 +110,16 @@ before_install:
sudo easy_install pytest; sudo easy_install pytest;
fi fi
- if [[ "$OPENCL" == "true" ]]; then - if [[ "$OPENCL" == "true" ]]; then
sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main universe restricted multiverse"; wget https://jenkins.choderalab.org/userContent/AMD-APP-SDKInstaller-v3.0.130.135-GA-linux64.tar.bz2;
sudo apt-get -yq update > /dev/null 2>&1 ; tar -xjf AMD-APP-SDK*.tar.bz2;
sudo apt-get install -qq fglrx=2:8.960-0ubuntu1 opencl-headers; AMDAPPSDK=${HOME}/AMDAPPSDK;
export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors;
mkdir -p ${OPENCL_VENDOR_PATH};
sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK};
echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd;
export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH};
chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo;
${AMDAPPSDK}/bin/x86_64/clinfo;
fi fi
# Install swig for Python wrappers. However, testing CUDA and OpenCL, we # Install swig for Python wrappers. However, testing CUDA and OpenCL, we
# skip the Python wrapper for speed. We're not using anaconda python, # skip the Python wrapper for speed. We're not using anaconda python,
......
...@@ -19,13 +19,14 @@ CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL" ...@@ -19,13 +19,14 @@ CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL"
# setting the rpath so that libOpenMMPME.so finds the right libfftw3 # setting the rpath so that libOpenMMPME.so finds the right libfftw3
#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.." #CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++" CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-7.0/lib64/libcudart.so" CMAKE_FLAGS+=" -DCUDA_CUDART_LIBRARY=/usr/local/cuda-7.5/lib64/libcudart.so"
CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.0/bin/nvcc" CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.5/bin/nvcc"
CMAKE_FLAGS+=" -DCUDA_SDK_ROOT_DIR=/usr/local/cuda-7.0/" CMAKE_FLAGS+=" -DCUDA_SDK_ROOT_DIR=/usr/local/cuda-7.5/"
CMAKE_FLAGS+=" -DCUDA_TOOLKIT_INCLUDE=/usr/local/cuda-7.0/include" CMAKE_FLAGS+=" -DCUDA_TOOLKIT_INCLUDE=/usr/local/cuda-7.5/include"
CMAKE_FLAGS+=" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-7.0/" CMAKE_FLAGS+=" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-7.5/"
CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-2.9-1/include/" CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-3.0/include/"
CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-2.9-1/lib/x86_64/libOpenCL.so" CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-3.0/lib/x86_64/libOpenCL.so"
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Set location for FFTW3 # Set location for FFTW3
PREFIX="$WORKSPACE/miniconda" PREFIX="$WORKSPACE/miniconda"
...@@ -40,7 +41,8 @@ fi ...@@ -40,7 +41,8 @@ fi
mkdir build mkdir build
cd build cd build
cmake ../openmm $CMAKE_FLAGS cmake ../openmm $CMAKE_FLAGS
make -j4 all DoxygenApiDocs sphinxpdf make -j4 all install
make -j4 PythonInstall C++ApiDocs PythonApiDocs sphinxpdf
# Install. # Install.
make install make install
#!/bin/tcsh #!/bin/bash
# Prepare for build by ensuring necessary prerequisites are locally installed. # Prepare for build by ensuring necessary prerequisites are locally installed.
...@@ -6,16 +6,16 @@ ...@@ -6,16 +6,16 @@
export WORKSPACE=`pwd` export WORKSPACE=`pwd`
# Install miniconda # Install miniconda
export VERSION="Latest" export VERSION="latest"
export PLATFORM="Linux" export PLATFORM="Linux"
export ARCH="x86_64" export ARCH="x86_64"
export MINICONDA="Miniconda-$VERSION-$PLATFORM-$ARCH.sh" export MINICONDA="Miniconda2-$VERSION-$PLATFORM-$ARCH.sh"
if [ -f miniconda ]; if [ -f miniconda ];
then then
echo "miniconda already exists" echo "miniconda already exists"
else else
echo "Downloading miniconda..." echo "Downloading miniconda..."
rm -rf Miniconda-* rm -rf Miniconda-* miniconda ~/.condarc
wget --quiet http://repo.continuum.io/miniconda/${MINICONDA} wget --quiet http://repo.continuum.io/miniconda/${MINICONDA}
bash ${MINICONDA} -b -p miniconda bash ${MINICONDA} -b -p miniconda
PIP_ARGS="-U" PIP_ARGS="-U"
...@@ -25,6 +25,6 @@ fi ...@@ -25,6 +25,6 @@ fi
export PATH=$WORKSPACE/miniconda/bin:$PATH export PATH=$WORKSPACE/miniconda/bin:$PATH
# Ensure configuration is up to date. # Ensure configuration is up to date.
conda config --add channels http://conda.binstar.org/omnia conda config --add channels omnia
conda install --yes --quiet swig fftw3f pip conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
pip install sphinxcontrib-bibtex
...@@ -22,6 +22,7 @@ CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL" ...@@ -22,6 +22,7 @@ CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL"
CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++" CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
CMAKE_FLAGS+=" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.9" CMAKE_FLAGS+=" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.9"
CMAKE_FLAGS+=" -DCMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk" CMAKE_FLAGS+=" -DCMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk"
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Build in subdirectory. # Build in subdirectory.
# Set location for FFTW3 # Set location for FFTW3
...@@ -37,7 +38,8 @@ fi ...@@ -37,7 +38,8 @@ fi
mkdir build mkdir build
cd build cd build
cmake ../openmm $CMAKE_FLAGS cmake ../openmm $CMAKE_FLAGS
make -j4 all DoxygenApiDocs sphinxpdf make -j4 all install
make -j4 PythonInstall C++ApiDocs PythonApiDocs sphinxpdf
# Install. # Install.
make install make install
...@@ -27,4 +27,4 @@ export PATH=$WORKSPACE/miniconda/bin:$PATH ...@@ -27,4 +27,4 @@ export PATH=$WORKSPACE/miniconda/bin:$PATH
# Ensure configuration is up to date. # Ensure configuration is up to date.
conda config --add channels http://conda.binstar.org/omnia conda config --add channels http://conda.binstar.org/omnia
conda install --yes --quiet swig fftw3f pip conda install --yes --quiet swig fftw3f pip
pip install sphinxcontrib-bibtex pip install sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen
...@@ -28,6 +28,7 @@ CMAKE_FLAGS+=" -DOPENMM_BUILD_DRUDE_OPENCL_LIB=OFF" ...@@ -28,6 +28,7 @@ CMAKE_FLAGS+=" -DOPENMM_BUILD_DRUDE_OPENCL_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_OPENCL_LIB=OFF" CMAKE_FLAGS+=" -DOPENMM_BUILD_OPENCL_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_CUDA_LIB=OFF" CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_CUDA_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_OPENCL_LIB=OFF" CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_OPENCL_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_GENERATE_API_DOCS=ON"
# Set location for FFTW3 # Set location for FFTW3
#PREFIX="$WORKSPACE/miniconda" #PREFIX="$WORKSPACE/miniconda"
...@@ -42,7 +43,8 @@ fi ...@@ -42,7 +43,8 @@ fi
mkdir build mkdir build
cd build cd build
cmake ../openmm $CMAKE_FLAGS cmake ../openmm $CMAKE_FLAGS
make -j4 all DoxygenApiDocs sphinxpdf make -j4 all install
make -j4 PythonInstall C++ApiDocs PythonApiDocs sphinxpdf
# Install. # Install.
make install make install
#!/bin/tcsh #!/bin/bash
# Prepare for build by ensuring necessary prerequisites are locally installed. # Prepare for build by ensuring necessary prerequisites are locally installed.
# Set relative workspace path. # Set relative workspace path.
export WORKSPACE=`pwd` export WORKSPACE=`pwd`
# Install miniconda # Install miniconda
export VERSION="Latest" export VERSION="latest"
export PLATFORM="Linux" export PLATFORM="Linux"
export ARCH="x86_64" export ARCH="x86_64"
export MINICONDA="Miniconda-$VERSION-$PLATFORM-$ARCH.sh" export MINICONDA="Miniconda2-$VERSION-$PLATFORM-$ARCH.sh"
if [ -f miniconda ]; if [ -f miniconda ];
then then
echo "miniconda already exists" echo "miniconda already exists"
else else
echo "Downloading miniconda..." echo "Downloading miniconda..."
rm -rf Miniconda-* rm -rf Miniconda-* miniconda ~/.condarc
wget --quiet http://repo.continuum.io/miniconda/${MINICONDA} wget --quiet http://repo.continuum.io/miniconda/${MINICONDA}
bash ${MINICONDA} -b -p miniconda bash ${MINICONDA} -b -p miniconda
PIP_ARGS="-U" PIP_ARGS="-U"
...@@ -25,6 +26,6 @@ fi ...@@ -25,6 +26,6 @@ fi
export PATH=$WORKSPACE/miniconda/bin:$PATH export PATH=$WORKSPACE/miniconda/bin:$PATH
# Ensure configuration is up to date. # Ensure configuration is up to date.
conda config --add channels http://conda.binstar.org/omnia conda config --add channels omnia
conda install --yes --quiet swig fftw3f pip conda install --yes --quiet swig fftw3f pip doxygen sphinx sphinxcontrib-bibtex sphinxcontrib-lunrsearch sphinxcontrib-autodoc_doxygen lxml cmake
pip install sphinxcontrib-bibtex
...@@ -2070,7 +2070,7 @@ Missing residue templates ...@@ -2070,7 +2070,7 @@ Missing residue templates
.. CAUTION:: .. CAUTION::
These features are experimental, and its API is subject to change. These features are experimental, and its API is subject to change.
You can use the :method:`getUnmatchedResidues()` method to get a list of residues You can use the :meth:`getUnmatchedResidues()` method to get a list of residues
in the provided :code:`topology` object that do not currently have a matching in the provided :code:`topology` object that do not currently have a matching
residue template defined in the :class:`ForceField`. residue template defined in the :class:`ForceField`.
:: ::
...@@ -2084,7 +2084,7 @@ with residue template definitions, or identifying which additional residues need ...@@ -2084,7 +2084,7 @@ with residue template definitions, or identifying which additional residues need
to be parameterized. to be parameterized.
As a convenience for parameterizing new residues, you can also get a list of As a convenience for parameterizing new residues, you can also get a list of
residues and empty residue templates using :method:`generateTemplatesForUnmatchedResidues` residues and empty residue templates using :meth:`generateTemplatesForUnmatchedResidues`
:: ::
pdb = PDBFile('input.pdb') pdb = PDBFile('input.pdb')
...@@ -2098,7 +2098,7 @@ residues and empty residue templates using :method:`generateTemplatesForUnmatche ...@@ -2098,7 +2098,7 @@ residues and empty residue templates using :method:`generateTemplatesForUnmatche
forcefield.registerResidueTemplate(template) forcefield.registerResidueTemplate(template)
If you find that templates seem to be incorrectly matched, another useful If you find that templates seem to be incorrectly matched, another useful
function :method:`getMatchingTemplates()` can help you identify which templates function :meth:`getMatchingTemplates()` can help you identify which templates
are being matched: are being matched:
:: ::
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
// This file is part of the "Irrlicht Engine" and the "irrXML" project. // This file is part of the "Irrlicht Engine" and the "irrXML" project.
// For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
// MODIFIED by Peter Eastman, Feb. 4, 2016, to support numeric escape sequences
#ifndef __ICXML_READER_IMPL_H_INCLUDED__ #ifndef __ICXML_READER_IMPL_H_INCLUDED__
#define __ICXML_READER_IMPL_H_INCLUDED__ #define __ICXML_READER_IMPL_H_INCLUDED__
...@@ -529,10 +531,37 @@ private: ...@@ -529,10 +531,37 @@ private:
pos += SpecialCharacters[specialChar].size(); pos += SpecialCharacters[specialChar].size();
} }
else else
{
int semicolonPos = origstr.findNext(L';', pos);
if (semicolonPos != -1 && origstr.c_str()[pos+1] == L'#')
{
// it is a numeric character reference
int number;
core::string<char> numberString;
if (origstr.c_str()[pos+2] == L'x')
{
// hex value
for (int i=pos+3; i<semicolonPos; ++i)
numberString.append((char) origstr[i]);
sscanf(numberString.c_str(), "%x", &number);
}
else
{
// decimal value
for (int i=pos+2; i<semicolonPos; ++i)
numberString.append((char) origstr[i]);
sscanf(numberString.c_str(), "%d", &number);
}
newstr.append(origstr.subString(oldPos, pos - oldPos));
newstr.append((char_type) number);
pos = semicolonPos+1;
}
else
{ {
newstr.append(origstr.subString(oldPos, pos - oldPos + 1)); newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
pos += 1; pos += 1;
} }
}
// find next & // find next &
oldPos = pos; oldPos = pos;
......
#ifndef OPENMM_TIMER_H_
#define OPENMM_TIMER_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This header provides a static function for querying the current system time in seconds.
* It is useful when profiling.
*/
#ifdef _MSC_VER
#include <Windows.h>
static double getCurrentTime() {
FILETIME ft;
GetSystemTimeAsFileTime(&ft); // 100-nanoseconds since 1-1-1601
ULARGE_INTEGER result;
result.LowPart = ft.dwLowDateTime;
result.HighPart = ft.dwHighDateTime;
return 1e-7*result.QuadPart;
}
#else
#include <sys/time.h>
static double getCurrentTime() {
struct timeval tod;
gettimeofday(&tod, 0);
return tod.tv_sec+1e-6*tod.tv_usec;
}
#endif
#endif /*OPENMM_TIMER_H_*/
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. * * Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -192,6 +192,7 @@ public: ...@@ -192,6 +192,7 @@ public:
*/ */
void loadCheckpoint(ContextImpl& context, std::istream& stream); void loadCheckpoint(ContextImpl& context, std::istream& stream);
private: private:
class GetPositionsTask;
CudaContext& cu; CudaContext& cu;
}; };
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportCuda.h" #include "windowsExportCuda.h"
namespace OpenMM { namespace OpenMM {
...@@ -122,7 +123,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData { ...@@ -122,7 +123,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public: public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty, PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty, const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty,
const std::string& pmeStreamProperty); const std::string& pmeStreamProperty, int numThreads);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
void syncContexts(); void syncContexts();
...@@ -134,6 +135,7 @@ public: ...@@ -134,6 +135,7 @@ public:
int stepCount, computeForceCount; int stepCount, computeForceCount;
double time; double time;
std::map<std::string, std::string> propertyValues; std::map<std::string, std::string> propertyValues;
ThreadPool threads;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -351,7 +351,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -351,7 +351,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
break; break;
} }
case Operation::POWER: case Operation::POWER:
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")"; out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break; break;
case Operation::NEGATE: case Operation::NEGATE:
out << "-" << getTempName(node.getChildren()[0], temps); out << "-" << getTempName(node.getChildren()[0], temps);
...@@ -488,14 +488,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -488,14 +488,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "}"; out << "}";
} }
else else
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << context.doubleToString(exponent) << ")"; out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << context.doubleToString(exponent) << ")";
break; break;
} }
case Operation::MIN: case Operation::MIN:
out << "min(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")"; out << "min((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break; break;
case Operation::MAX: case Operation::MAX:
out << "max(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")"; out << "max((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break; break;
case Operation::ABS: case Operation::ABS:
out << "fabs(" << getTempName(node.getChildren()[0], temps) << ")"; out << "fabs(" << getTempName(node.getChildren()[0], temps) << ")";
......
...@@ -141,17 +141,23 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -141,17 +141,23 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time); contexts[i]->setTime(time);
} }
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) { class CudaUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
cu.setAsCurrent(); public:
GetPositionsTask(CudaContext& cu, vector<Vec3>& positions, vector<float4>& posCorrection) : cu(cu), positions(positions), posCorrection(posCorrection) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = cu.getNumAtoms();
positions.resize(numParticles);
Vec3 boxVectors[3]; Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer(); double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq); for (int i = start; i < end; ++i) {
for (int i = 0; i < numParticles; ++i) {
double4 pos = posq[i]; double4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i]; int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z; positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
...@@ -159,10 +165,7 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& ...@@ -159,10 +165,7 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
} }
else if (cu.getUseMixedPrecision()) { else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
vector<float4> posCorrection; for (int i = start; i < end; ++i) {
cu.getPosq().download(posq);
cu.getPosqCorrection().download(posCorrection);
for (int i = 0; i < numParticles; ++i) {
float4 pos1 = posq[i]; float4 pos1 = posq[i];
float4 pos2 = posCorrection[i]; float4 pos2 = posCorrection[i];
int4 offset = cu.getPosCellOffsets()[i]; int4 offset = cu.getPosCellOffsets()[i];
...@@ -171,13 +174,43 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& ...@@ -171,13 +174,43 @@ void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
} }
else { else {
float4* posq = (float4*) cu.getPinnedBuffer(); float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq); for (int i = start; i < end; ++i) {
for (int i = 0; i < numParticles; ++i) {
float4 pos = posq[i]; float4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i]; int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z; positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
} }
} }
}
CudaContext& cu;
vector<Vec3>& positions;
vector<float4>& posCorrection;
};
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cu, positions, posCorrection);
cu.getPlatformData().threads.execute(task);
cu.getPlatformData().threads.waitForThreads();
} }
void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) { void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
...@@ -6628,12 +6661,12 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6628,12 +6661,12 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) { if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double value; double value;
summedValue->download(&value); summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value; recordGlobalValue(value, stepTarget[step]);
} }
else { else {
float value; float value;
summedValue->download(&value); summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value; recordGlobalValue(value, stepTarget[step]);
} }
} }
else if (stepType[step] == CustomIntegrator::UpdateContextState) { else if (stepType[step] == CustomIntegrator::UpdateContextState) {
...@@ -6742,6 +6775,7 @@ void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget ...@@ -6742,6 +6775,7 @@ void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget
case DT: case DT:
if (value != globalValuesDouble[dtVariableIndex]) if (value != globalValuesDouble[dtVariableIndex])
deviceGlobalsAreCurrent = false; deviceGlobalsAreCurrent = false;
expressionSet.setVariable(dtVariableIndex, value);
globalValuesDouble[dtVariableIndex] = value; globalValuesDouble[dtVariableIndex] = value;
cu.getIntegrationUtilities().setNextStepSize(value); cu.getIntegrationUtilities().setNextStepSize(value);
break; break;
......
...@@ -29,9 +29,10 @@ ...@@ -29,9 +29,10 @@
#include "CudaPlatform.h" #include "CudaPlatform.h"
#include "CudaKernelFactory.h" #include "CudaKernelFactory.h"
#include "CudaKernels.h" #include "CudaKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <sstream> #include <sstream>
...@@ -175,7 +176,11 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string ...@@ -175,7 +176,11 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name()); pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName)) if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false"; cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue, pmeStreamPropValue)); int threads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue, pmeStreamPropValue, threads));
} }
void CudaPlatform::contextDestroyed(ContextImpl& context) const { void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -184,7 +189,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -184,7 +189,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
} }
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false) { const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty, int numThreads) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
size_t searchPos = 0, nextPos; size_t searchPos = 0, nextPos;
......
...@@ -59,7 +59,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -59,7 +59,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/** /**
* Determine whether a particular interaction is in the list of exclusions. * Determine whether a particular interaction is in the list of exclusions.
*/ */
inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex) { inline __device__ bool isInteractionExcluded(int atom1, int atom2, const int* __restrict__ exclusions, const int* __restrict__ exclusionStartIndex) {
int first = exclusionStartIndex[atom1]; int first = exclusionStartIndex[atom1];
int last = exclusionStartIndex[atom1+1]; int last = exclusionStartIndex[atom1+1];
for (int i = last-1; i >= first; i--) { for (int i = last-1; i >= first; i--) {
...@@ -180,7 +180,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi ...@@ -180,7 +180,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
const real4* __restrict__ posq, const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs, const real4* __restrict__ posq, const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs,
int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex , const int* __restrict__ exclusions, const int* __restrict__ exclusionStartIndex
#endif #endif
) { ) {
__shared__ real3 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE]; __shared__ real3 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE];
...@@ -265,6 +265,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi ...@@ -265,6 +265,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
} }
} }
} }
if (atom1 < NUM_ATOMS)
numNeighborsForAtom[atom1] = totalNeighborsForAtom1; numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
} }
} }
...@@ -308,6 +309,7 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig ...@@ -308,6 +309,7 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
} }
globalOffset += posBuffer[blockDim.x-1]; globalOffset += posBuffer[blockDim.x-1];
__syncthreads();
} }
if (threadIdx.x == 0) if (threadIdx.x == 0)
neighborStartIndex[0] = 0; neighborStartIndex[0] = 0;
......
...@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { ...@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream())); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -56,7 +56,7 @@ void testGaussian() { ...@@ -56,7 +56,7 @@ void testGaussian() {
system.addParticle(1.0); system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream())); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) { ...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream())); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
CudaArray data(context, array.size(), 4, "sortData"); CudaArray data(context, array.size(), 4, "sortData");
......
...@@ -74,7 +74,7 @@ public: ...@@ -74,7 +74,7 @@ public:
*/ */
std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables, std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames, const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
const std::string& prefix, const std::string& tempType="float"); const std::string& prefix, const std::string& tempType="real");
/** /**
* Calculate the spline coefficients for a tabulated function that appears in expressions. * Calculate the spline coefficients for a tabulated function that appears in expressions.
* *
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment