Unverified Commit 483239ef authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Improve selection of architecture to compile for (#3017)

* Improve selection of architecture to compile for

* Fixed compilation errors on older CUDA versions
parent ebef35a4
...@@ -530,9 +530,10 @@ private: ...@@ -530,9 +530,10 @@ private:
int contextIndex; int contextIndex;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
int gpuArchitecture;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, hasAssignedPosqCharges; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, hasAssignedPosqCharges;
bool isLinkedContext; bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string compiler, tempDir, cacheDir;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -62,6 +62,10 @@ public: ...@@ -62,6 +62,10 @@ public:
* @param cu the CudaContext for which the kernel is being compiled * @param cu the CudaContext for which the kernel is being compiled
*/ */
virtual std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu) = 0; virtual std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu) = 0;
/**
* Get the maximum architecture version the compiler supports.
*/
virtual int getMaxSupportedArchitecture() const = 0;
}; };
/** /**
......
...@@ -227,7 +227,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -227,7 +227,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor = 3; minor = 3;
} }
} }
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = 10*major+minor;
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
contextIsValid = true; contextIsValid = true;
...@@ -532,6 +532,16 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -532,6 +532,16 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
src << endl; src << endl;
src << source << endl; src << source << endl;
// Determine what architecture to compile for.
string compileArchitecture;
if (hasCompilerKernel) {
int maxCompilerArchitecture = compilerKernel.getAs<CudaCompilerKernel>().getMaxSupportedArchitecture();
compileArchitecture = intToString(min(gpuArchitecture, maxCompilerArchitecture));
}
else
compileArchitecture = intToString(gpuArchitecture);
// See whether we already have PTX for this kernel cached. // See whether we already have PTX for this kernel cached.
CSHA1 sha1; CSHA1 sha1;
...@@ -544,7 +554,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -544,7 +554,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
cacheFile.flags(ios::hex); cacheFile.flags(ios::hex);
for (int i = 0; i < 20; i++) for (int i = 0; i < 20; i++)
cacheFile << setw(2) << setfill('0') << (int) hash[i]; cacheFile << setw(2) << setfill('0') << (int) hash[i];
cacheFile << '_' << gpuArchitecture << '_' << bits; cacheFile << '_' << compileArchitecture << '_' << bits;
CUmodule module; CUmodule module;
if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS) if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS)
return module; return module;
...@@ -566,7 +576,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -566,7 +576,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
// If the runtime compiler plugin is available, use it. // If the runtime compiler plugin is available, use it.
if (hasCompilerKernel) { if (hasCompilerKernel) {
string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this); string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+compileArchitecture+" "+options, *this);
// If possible, write the PTX out to a temporary file so we can cache it for later use. // If possible, write the PTX out to a temporary file so we can cache it for later use.
...@@ -596,13 +606,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -596,13 +606,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out.close(); out.close();
#ifdef WIN32 #ifdef WIN32
#ifdef _DEBUG #ifdef _DEBUG
string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile; string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+compileArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#else #else
string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile; string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+compileArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#endif #endif
res = executeInWindows(command); res = executeInWindows(command);
#else #else
string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\""; string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+compileArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
res = std::system(command.c_str()); res = std::system(command.c_str());
#endif #endif
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2015 Stanford University and the Authors. * * Portions copyright (c) 2015-2021 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -48,6 +48,22 @@ static string getErrorString(nvrtcResult result) { ...@@ -48,6 +48,22 @@ static string getErrorString(nvrtcResult result) {
return nvrtcGetErrorString(result); return nvrtcGetErrorString(result);
} }
CudaRuntimeCompilerKernel::CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform) : CudaCompilerKernel(name, platform) {
// Find the maximum architecture the compiler supports.
#if CUDA_VERSION < 11000
// CUDA versions before 11 can't query the compiler to see what it supports.
maxSupportedArchitecture = 75;
#else
int numArchs;
CHECK_RESULT(nvrtcGetNumSupportedArchs(&numArchs), "Error querying supported architectures");
vector<int> archs(numArchs);
CHECK_RESULT(nvrtcGetSupportedArchs(archs.data()), "Error querying supported architectures");
maxSupportedArchitecture = archs.back();
#endif
}
string CudaRuntimeCompilerKernel::createModule(const string& source, const string& flags, CudaContext& cu) { string CudaRuntimeCompilerKernel::createModule(const string& source, const string& flags, CudaContext& cu) {
// Split the command line flags into an array of options. // Split the command line flags into an array of options.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2015 Stanford University and the Authors. * * Portions copyright (c) 2015-2021 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -44,8 +44,7 @@ namespace OpenMM { ...@@ -44,8 +44,7 @@ namespace OpenMM {
*/ */
class OPENMM_EXPORT_CUDACOMPILER CudaRuntimeCompilerKernel : public CudaCompilerKernel { class OPENMM_EXPORT_CUDACOMPILER CudaRuntimeCompilerKernel : public CudaCompilerKernel {
public: public:
CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform) : CudaCompilerKernel(name, platform) { CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform);
}
/** /**
* Compile a kernel to PTX. * Compile a kernel to PTX.
* *
...@@ -54,6 +53,14 @@ public: ...@@ -54,6 +53,14 @@ public:
* @param cu the CudaContext for which the kernel is being compiled * @param cu the CudaContext for which the kernel is being compiled
*/ */
std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu); std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu);
/**
* Get the maximum architecture version the compiler supports.
*/
int getMaxSupportedArchitecture() const {
return maxSupportedArchitecture;
}
private:
int maxSupportedArchitecture;
}; };
} // namespace OpenMM } // namespace OpenMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment