Unverified Commit 483239ef authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Improve selection of architecture to compile for (#3017)

* Improve selection of architecture to compile for

* Fixed compilation errors on older CUDA versions
parent ebef35a4
......@@ -530,9 +530,10 @@ private:
int contextIndex;
int numAtomBlocks;
int numThreadBlocks;
int gpuArchitecture;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, hasAssignedPosqCharges;
bool isLinkedContext;
std::string compiler, tempDir, cacheDir, gpuArchitecture;
std::string compiler, tempDir, cacheDir;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
std::string defaultOptimizationOptions;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -62,6 +62,10 @@ public:
* @param cu the CudaContext for which the kernel is being compiled
*/
virtual std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu) = 0;
/**
* Get the maximum architecture version the compiler supports.
*/
virtual int getMaxSupportedArchitecture() const = 0;
};
/**
......
......@@ -227,7 +227,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor = 3;
}
}
gpuArchitecture = intToString(major)+intToString(minor);
gpuArchitecture = 10*major+minor;
computeCapability = major+0.1*minor;
contextIsValid = true;
......@@ -531,6 +531,16 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
if (!defines.empty())
src << endl;
src << source << endl;
// Determine what architecture to compile for.
string compileArchitecture;
if (hasCompilerKernel) {
int maxCompilerArchitecture = compilerKernel.getAs<CudaCompilerKernel>().getMaxSupportedArchitecture();
compileArchitecture = intToString(min(gpuArchitecture, maxCompilerArchitecture));
}
else
compileArchitecture = intToString(gpuArchitecture);
// See whether we already have PTX for this kernel cached.
......@@ -544,7 +554,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
cacheFile.flags(ios::hex);
for (int i = 0; i < 20; i++)
cacheFile << setw(2) << setfill('0') << (int) hash[i];
cacheFile << '_' << gpuArchitecture << '_' << bits;
cacheFile << '_' << compileArchitecture << '_' << bits;
CUmodule module;
if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS)
return module;
......@@ -566,7 +576,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
// If the runtime compiler plugin is available, use it.
if (hasCompilerKernel) {
string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this);
string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+compileArchitecture+" "+options, *this);
// If possible, write the PTX out to a temporary file so we can cache it for later use.
......@@ -596,13 +606,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out.close();
#ifdef WIN32
#ifdef _DEBUG
string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
string command = compiler+" --ptx -G -g --machine "+bits+" -arch=sm_"+compileArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#else
string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
string command = compiler+" --ptx -lineinfo --machine "+bits+" -arch=sm_"+compileArchitecture+" -o "+outputFile+" "+options+" "+inputFile+" 2> "+logFile;
#endif
res = executeInWindows(command);
#else
string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+gpuArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
string command = compiler+" --ptx --machine "+bits+" -arch=sm_"+compileArchitecture+" -o \""+outputFile+"\" "+options+" \""+inputFile+"\" 2> \""+logFile+"\"";
res = std::system(command.c_str());
#endif
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2015-2021 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -48,6 +48,22 @@ static string getErrorString(nvrtcResult result) {
return nvrtcGetErrorString(result);
}
CudaRuntimeCompilerKernel::CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform) : CudaCompilerKernel(name, platform) {
// Find the maximum architecture the compiler supports.
#if CUDA_VERSION < 11000
// CUDA versions before 11 can't query the compiler to see what it supports.
maxSupportedArchitecture = 75;
#else
int numArchs;
CHECK_RESULT(nvrtcGetNumSupportedArchs(&numArchs), "Error querying supported architectures");
vector<int> archs(numArchs);
CHECK_RESULT(nvrtcGetSupportedArchs(archs.data()), "Error querying supported architectures");
maxSupportedArchitecture = archs.back();
#endif
}
string CudaRuntimeCompilerKernel::createModule(const string& source, const string& flags, CudaContext& cu) {
// Split the command line flags into an array of options.
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2015-2021 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -44,8 +44,7 @@ namespace OpenMM {
*/
class OPENMM_EXPORT_CUDACOMPILER CudaRuntimeCompilerKernel : public CudaCompilerKernel {
public:
CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform) : CudaCompilerKernel(name, platform) {
}
CudaRuntimeCompilerKernel(const std::string& name, const Platform& platform);
/**
* Compile a kernel to PTX.
*
......@@ -54,6 +53,14 @@ public:
* @param cu the CudaContext for which the kernel is being compiled
*/
std::string createModule(const std::string& source, const std::string& flags, CudaContext& cu);
/**
* Get the maximum architecture version the compiler supports.
*/
int getMaxSupportedArchitecture() const {
return maxSupportedArchitecture;
}
private:
int maxSupportedArchitecture;
};
} // namespace OpenMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment