New CUDA platform works on Windows

ca7fd533 · Peter Eastman · cf112a25 · ca7fd533 · ca7fd533 · ca7fd533
Commit ca7fd533 authored Jul 03, 2012 by Peter Eastman
5 changed files
--- a/platforms/cuda2/src/CudaArray.h
+++ b/platforms/cuda2/src/CudaArray.h
@@ -28,6 +28,7 @@
 * -------------------------------------------------------------------------- */
 #include "openmm/OpenMMException.h"
+#include "openmm/internal/windowsExport.h"
 #include <cuda.h>
 #include <iostream>
 #include <sstream>
@@ -42,7 +43,7 @@ class CudaContext;
 * for working with it and for copying data to and from device memory.
 */
-class CudaArray {
+class OPENMM_EXPORT CudaArray {
 public:
    /**
     * Create a CudaArray object.  The object is allocated on the heap with the "new" operator.

--- a/platforms/cuda2/src/CudaBondedUtilities.cpp
+++ b/platforms/cuda2/src/CudaBondedUtilities.cpp
@@ -79,14 +79,13 @@ void CudaBondedUtilities::initialize(const System& system) {
        int startAtom = 0;
        while (startAtom < numAtoms) {
            int width = min(numAtoms-startAtom, 4);
-            if (width == 3)
+			int paddedWidth = (width == 3 ? 4 : width);
-                width = 4;
+            vector<unsigned int> indexVec(paddedWidth*numBonds);
-            vector<unsigned int> indexVec(width*numBonds);
            for (int bond = 0; bond < numBonds; bond++) {
                for (int atom = 0; atom < width; atom++)
-                    indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom];
+                    indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
            }
-            CudaArray* indices = new CudaArray(context, numBonds, 4*width, "bondedIndices");
+            CudaArray* indices = new CudaArray(context, numBonds, 4*paddedWidth, "bondedIndices");
            indices->upload(&indexVec[0]);
            atomIndices[i].push_back(indices);
            startAtom += width;

--- a/platforms/cuda2/src/CudaContext.cpp
+++ b/platforms/cuda2/src/CudaContext.cpp
@@ -328,10 +328,11 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
    out << src.str();
    out.close();
 #ifdef WIN32
+    string command = ""+compiler+" --ptx -arch=compute_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
 #else
    string command = "\""+compiler+"\" --ptx -arch=compute_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
-    int res = std::system(command.c_str());
 #endif
+    int res = std::system(command.c_str());
    try {
        if (res != 0) {
            // Load the error log.

--- a/platforms/cuda2/src/CudaKernels.cpp
+++ b/platforms/cuda2/src/CudaKernels.cpp
@@ -1411,6 +1411,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        replacements["KMAX_Z"] = cu.intToString(kmaxz);
        replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
        replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
+        replacements["M_PI"] = cu.doubleToString(M_PI);
        CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
        ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
        ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
@@ -1437,6 +1438,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
        pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
        pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
+        pmeDefines["M_PI"] = cu.doubleToString(M_PI);
        if (cu.getUseDoublePrecision())
            pmeDefines["USE_DOUBLE_PRECISION"] = "1";
        CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);

--- a/platforms/cuda2/src/CudaPlatform.cpp
+++ b/platforms/cuda2/src/CudaPlatform.cpp
@@ -36,7 +36,9 @@
 #include <cctype>
 #include <sstream>
 #include <cstdio>
+#ifdef _MSC_VER
+    #include <Windows.h>
+#endif
 using namespace OpenMM;
 using namespace std;
@@ -84,7 +86,15 @@ CudaPlatform::CudaPlatform() {
    setPropertyDefaultValue(CudaUseBlockingSync(), "true");
    setPropertyDefaultValue(CudaPrecision(), "single");
 #ifdef _MSC_VER
-    setPropertyDefaultValue(CudaCompiler(), "nvcc");
+    char* bindir = getenv("CUDA_BIN_PATH");
+	string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
+    int length = GetShortPathName(nvcc.c_str(), NULL, 0);
+	if (length > 0) {
+		vector<char> shortName(length);
+	    GetShortPathName(nvcc.c_str(), &shortName[0], length);
+		nvcc = string(&shortName[0]);
+	}
+    setPropertyDefaultValue(CudaCompiler(), nvcc);
    setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
 #else
    setPropertyDefaultValue(CudaCompiler(), "/usr/local/cuda/bin/nvcc");