Prefer nvcc over runtime compiler

978e3a08 · Peter Eastman · 1f7866ad · 978e3a08 · 978e3a08
Commit 978e3a08 authored Aug 05, 2016 by Peter Eastman
Hide whitespace changes
Inline Side-by-side

Showing with 64 additions and 35 deletions

platforms/cuda/include/CudaContext.h platforms/cuda/include/CudaContext.h +1 -1

platforms/cuda/src/CudaContext.cpp platforms/cuda/src/CudaContext.cpp +63 -34

No files found.
--- a/platforms/cuda/include/CudaContext.h
+++ b/platforms/cuda/include/CudaContext.h
@@ -611,7 +611,7 @@ private:
    int paddedNumAtoms;
    int numAtomBlocks;
    int numThreadBlocks;
-    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, forcesValid;
+    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid;
    std::string compiler, tempDir, cacheDir, gpuArchitecture;
    float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
    double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;

--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -73,10 +73,43 @@ const int CudaContext::ThreadBlockSize = 64;
 const int CudaContext::TileSize = sizeof(tileflags)*8;
 bool CudaContext::hasInitializedCuda = false;
+#ifdef WIN32
+#include <Windows.h>
+static int executeInWindows(const string &command) {
+    // COMSPEC is an env variable pointing to full dir of cmd.exe
+    // it always defined on pretty much all Windows OSes
+    string fullcommand = getenv("COMSPEC") + string(" /C ") + command;
+    STARTUPINFO si;
+    PROCESS_INFORMATION pi;
+    ZeroMemory( &si, sizeof(si) );
+    si.cb = sizeof(si);
+    ZeroMemory( &pi, sizeof(pi) );
+    vector<char> args(std::max(1000, (int) fullcommand.size()+1));
+    strcpy(&args[0], fullcommand.c_str());
+    si.dwFlags = STARTF_USESHOWWINDOW;
+    si.wShowWindow = SW_HIDE;
+    if (!CreateProcess(NULL, &args[0], NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
+        return -1;
+    }
+    WaitForSingleObject(pi.hProcess, INFINITE);
+    DWORD exitCode = -1;
+    if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
+        throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
+    } else {
+        if(exitCode == 0)
+            return 0;
+        else
+            return -1;
+    }
+}
+#endif
 CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
        const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
-        time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
+        time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
        pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
+    // Determine what compiler to use.
    this->compiler = "\""+compiler+"\"";
    if (platformData.context != NULL) {
        try {
@@ -87,6 +120,24 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
            // The runtime compiler plugin isn't available.
        }
    }
+#ifdef WIN32
+    string testCompilerCommand = this->compiler+" --version > nul 2> nul";
+    int res = executeInWindows(testCompilerCommand.c_str());
+#else
+    string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
+    int res = std::system(testCompilerCommand.c_str());
+#endif
+    isNvccAvailable = (res == 0);
+    static bool hasShownNvccWarning = false;
+    if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) {
+        hasShownNvccWarning = true;
+        printf("Could not find nvcc.  Using runtime compiler, which may produce slower performance.  ");
+#ifdef WIN32
+        printf("Set CUDA_BIN_PATH to specify where nvcc is located.\n");
+#else
+        printf("Set OPENMM_CUDA_COMPILER to specify where nvcc is located.\n");
+#endif
+    }
    if (hostCompiler.size() > 0)
        this->compiler = compiler+" --compiler-bindir "+hostCompiler;
    if (!hasInitializedCuda) {
@@ -160,6 +211,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
        // anything beyond 5.0.
        if (major == 5)
            minor = 0;
+#endif
+#if __CUDA_API_VERSION < 8000
+        // This is a workaround to support Pascal with CUDA 7.5.  It reports
+        // its compute capability as 6.x, but the compiler doesn't support
+        // anything beyond 5.3.
+        if (major == 6) {
+            major = 5;
+            minor = 3;
+        }
 #endif
    gpuArchitecture = intToString(major)+intToString(minor);
    computeCapability = major+0.1*minor;
@@ -454,37 +514,6 @@ CUmodule CudaContext::createModule(const string source, const char* optimization
    return createModule(source, map<string, string>(), optimizationFlags);
 }
-#ifdef WIN32
-#include <Windows.h>
-static bool compileInWindows(const string &command) {
-    // COMSPEC is an env variable pointing to full dir of cmd.exe
-    // it always defined on pretty much all Windows OSes
-    string fullcommand = getenv("COMSPEC") + string(" /C ") + command;
-    STARTUPINFO si;
-    PROCESS_INFORMATION pi;
-    ZeroMemory( &si, sizeof(si) );
-    si.cb = sizeof(si);
-    ZeroMemory( &pi, sizeof(pi) );
-    vector<char> args(std::max(1000, (int) fullcommand.size()+1));
-    strcpy(&args[0], fullcommand.c_str());
-    si.dwFlags = STARTF_USESHOWWINDOW;
-    si.wShowWindow = SW_HIDE;
-    if (!CreateProcess(NULL, &args[0], NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
-        return -1;
-    }
-    WaitForSingleObject(pi.hProcess, INFINITE);
-    DWORD exitCode = -1;
-    if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
-        throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
-    } else {
-        if(exitCode == 0)
-            return 0;
-        else
-            return -1;
-    }
-}
-#endif
 CUmodule CudaContext::createModule(const string source, const map<string, string>& defines, const char* optimizationFlags) {
    string bits = intToString(8*sizeof(void*));
    string options = (optimizationFlags == NULL ? defaultOptimizationOptions : string(optimizationFlags));
@@ -567,7 +596,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
    // If the runtime compiler plugin is available, use it.
-    if (hasCompilerKernel) {
+    if (hasCompilerKernel && !isNvccAvailable) {
        string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this);
        // If possible, write the PTX out to a temporary file so we can cache it for later use.
@@ -602,7 +631,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
 #else
        string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
 #endif
-        int res = compileInWindows(command);
+        int res = executeInWindows(command);
 #else
        string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
        res = std::system(command.c_str());