Commit ca7fd533 authored by Peter Eastman's avatar Peter Eastman
Browse files

New CUDA platform works on Windows

parent cf112a25
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/windowsExport.h"
#include <cuda.h> #include <cuda.h>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
...@@ -42,7 +43,7 @@ class CudaContext; ...@@ -42,7 +43,7 @@ class CudaContext;
* for working with it and for copying data to and from device memory. * for working with it and for copying data to and from device memory.
*/ */
class CudaArray { class OPENMM_EXPORT CudaArray {
public: public:
/** /**
* Create a CudaArray object. The object is allocated on the heap with the "new" operator. * Create a CudaArray object. The object is allocated on the heap with the "new" operator.
......
...@@ -79,14 +79,13 @@ void CudaBondedUtilities::initialize(const System& system) { ...@@ -79,14 +79,13 @@ void CudaBondedUtilities::initialize(const System& system) {
int startAtom = 0; int startAtom = 0;
while (startAtom < numAtoms) { while (startAtom < numAtoms) {
int width = min(numAtoms-startAtom, 4); int width = min(numAtoms-startAtom, 4);
if (width == 3) int paddedWidth = (width == 3 ? 4 : width);
width = 4; vector<unsigned int> indexVec(paddedWidth*numBonds);
vector<unsigned int> indexVec(width*numBonds);
for (int bond = 0; bond < numBonds; bond++) { for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < width; atom++) for (int atom = 0; atom < width; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom]; indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
} }
CudaArray* indices = new CudaArray(context, numBonds, 4*width, "bondedIndices"); CudaArray* indices = new CudaArray(context, numBonds, 4*paddedWidth, "bondedIndices");
indices->upload(&indexVec[0]); indices->upload(&indexVec[0]);
atomIndices[i].push_back(indices); atomIndices[i].push_back(indices);
startAtom += width; startAtom += width;
......
...@@ -328,10 +328,11 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -328,10 +328,11 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out << src.str(); out << src.str();
out.close(); out.close();
#ifdef WIN32 #ifdef WIN32
string command = ""+compiler+" --ptx -arch=compute_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#else #else
string command = "\""+compiler+"\" --ptx -arch=compute_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\""; string command = "\""+compiler+"\" --ptx -arch=compute_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
int res = std::system(command.c_str());
#endif #endif
int res = std::system(command.c_str());
try { try {
if (res != 0) { if (res != 0) {
// Load the error log. // Load the error log.
......
...@@ -1411,6 +1411,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1411,6 +1411,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
replacements["KMAX_Z"] = cu.intToString(kmaxz); replacements["KMAX_Z"] = cu.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha)); replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0); replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cu.doubleToString(M_PI);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums"); ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces"); ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
...@@ -1437,6 +1438,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1437,6 +1438,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY); pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ); pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0)); pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1"; pmeDefines["USE_DOUBLE_PRECISION"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
......
...@@ -36,7 +36,9 @@ ...@@ -36,7 +36,9 @@
#include <cctype> #include <cctype>
#include <sstream> #include <sstream>
#include <cstdio> #include <cstdio>
#ifdef _MSC_VER
#include <Windows.h>
#endif
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -84,7 +86,15 @@ CudaPlatform::CudaPlatform() { ...@@ -84,7 +86,15 @@ CudaPlatform::CudaPlatform() {
setPropertyDefaultValue(CudaUseBlockingSync(), "true"); setPropertyDefaultValue(CudaUseBlockingSync(), "true");
setPropertyDefaultValue(CudaPrecision(), "single"); setPropertyDefaultValue(CudaPrecision(), "single");
#ifdef _MSC_VER #ifdef _MSC_VER
setPropertyDefaultValue(CudaCompiler(), "nvcc"); char* bindir = getenv("CUDA_BIN_PATH");
string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
int length = GetShortPathName(nvcc.c_str(), NULL, 0);
if (length > 0) {
vector<char> shortName(length);
GetShortPathName(nvcc.c_str(), &shortName[0], length);
nvcc = string(&shortName[0]);
}
setPropertyDefaultValue(CudaCompiler(), nvcc);
setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP"))); setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else #else
setPropertyDefaultValue(CudaCompiler(), "/usr/local/cuda/bin/nvcc"); setPropertyDefaultValue(CudaCompiler(), "/usr/local/cuda/bin/nvcc");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment