Commit 978e3a08 authored by Peter Eastman's avatar Peter Eastman
Browse files

Prefer nvcc over runtime compiler

parent 1f7866ad
...@@ -611,7 +611,7 @@ private: ...@@ -611,7 +611,7 @@ private:
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, forcesValid; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, forcesValid;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
......
...@@ -73,10 +73,43 @@ const int CudaContext::ThreadBlockSize = 64; ...@@ -73,10 +73,43 @@ const int CudaContext::ThreadBlockSize = 64;
const int CudaContext::TileSize = sizeof(tileflags)*8; const int CudaContext::TileSize = sizeof(tileflags)*8;
bool CudaContext::hasInitializedCuda = false; bool CudaContext::hasInitializedCuda = false;
#ifdef WIN32
#include <Windows.h>
static int executeInWindows(const string &command) {
// COMSPEC is an env variable pointing to full dir of cmd.exe
// it always defined on pretty much all Windows OSes
string fullcommand = getenv("COMSPEC") + string(" /C ") + command;
STARTUPINFO si;
PROCESS_INFORMATION pi;
ZeroMemory( &si, sizeof(si) );
si.cb = sizeof(si);
ZeroMemory( &pi, sizeof(pi) );
vector<char> args(std::max(1000, (int) fullcommand.size()+1));
strcpy(&args[0], fullcommand.c_str());
si.dwFlags = STARTF_USESHOWWINDOW;
si.wShowWindow = SW_HIDE;
if (!CreateProcess(NULL, &args[0], NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
return -1;
}
WaitForSingleObject(pi.hProcess, INFINITE);
DWORD exitCode = -1;
if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
} else {
if(exitCode == 0)
return 0;
else
return -1;
}
}
#endif
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) { if (platformData.context != NULL) {
try { try {
...@@ -87,6 +120,24 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -87,6 +120,24 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// The runtime compiler plugin isn't available. // The runtime compiler plugin isn't available.
} }
} }
#ifdef WIN32
string testCompilerCommand = this->compiler+" --version > nul 2> nul";
int res = executeInWindows(testCompilerCommand.c_str());
#else
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str());
#endif
isNvccAvailable = (res == 0);
static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) {
hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32
printf("Set CUDA_BIN_PATH to specify where nvcc is located.\n");
#else
printf("Set OPENMM_CUDA_COMPILER to specify where nvcc is located.\n");
#endif
}
if (hostCompiler.size() > 0) if (hostCompiler.size() > 0)
this->compiler = compiler+" --compiler-bindir "+hostCompiler; this->compiler = compiler+" --compiler-bindir "+hostCompiler;
if (!hasInitializedCuda) { if (!hasInitializedCuda) {
...@@ -160,6 +211,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -160,6 +211,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// anything beyond 5.0. // anything beyond 5.0.
if (major == 5) if (major == 5)
minor = 0; minor = 0;
#endif
#if __CUDA_API_VERSION < 8000
// This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3.
if (major == 6) {
major = 5;
minor = 3;
}
#endif #endif
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
...@@ -454,37 +514,6 @@ CUmodule CudaContext::createModule(const string source, const char* optimization ...@@ -454,37 +514,6 @@ CUmodule CudaContext::createModule(const string source, const char* optimization
return createModule(source, map<string, string>(), optimizationFlags); return createModule(source, map<string, string>(), optimizationFlags);
} }
#ifdef WIN32
#include <Windows.h>
static bool compileInWindows(const string &command) {
// COMSPEC is an env variable pointing to full dir of cmd.exe
// it always defined on pretty much all Windows OSes
string fullcommand = getenv("COMSPEC") + string(" /C ") + command;
STARTUPINFO si;
PROCESS_INFORMATION pi;
ZeroMemory( &si, sizeof(si) );
si.cb = sizeof(si);
ZeroMemory( &pi, sizeof(pi) );
vector<char> args(std::max(1000, (int) fullcommand.size()+1));
strcpy(&args[0], fullcommand.c_str());
si.dwFlags = STARTF_USESHOWWINDOW;
si.wShowWindow = SW_HIDE;
if (!CreateProcess(NULL, &args[0], NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
return -1;
}
WaitForSingleObject(pi.hProcess, INFINITE);
DWORD exitCode = -1;
if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
} else {
if(exitCode == 0)
return 0;
else
return -1;
}
}
#endif
CUmodule CudaContext::createModule(const string source, const map<string, string>& defines, const char* optimizationFlags) { CUmodule CudaContext::createModule(const string source, const map<string, string>& defines, const char* optimizationFlags) {
string bits = intToString(8*sizeof(void*)); string bits = intToString(8*sizeof(void*));
string options = (optimizationFlags == NULL ? defaultOptimizationOptions : string(optimizationFlags)); string options = (optimizationFlags == NULL ? defaultOptimizationOptions : string(optimizationFlags));
...@@ -567,7 +596,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -567,7 +596,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
// If the runtime compiler plugin is available, use it. // If the runtime compiler plugin is available, use it.
if (hasCompilerKernel) { if (hasCompilerKernel && !isNvccAvailable) {
string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this); string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this);
// If possible, write the PTX out to a temporary file so we can cache it for later use. // If possible, write the PTX out to a temporary file so we can cache it for later use.
...@@ -602,7 +631,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -602,7 +631,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
#else #else
string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile; string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#endif #endif
int res = compileInWindows(command); int res = executeInWindows(command);
#else #else
string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\""; string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
res = std::system(command.c_str()); res = std::system(command.c_str());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment