Commit 20fbe840 authored by Gustaf Ahdritz's avatar Gustaf Ahdritz
Browse files

Fix kernel installation bug

parent 87f3cd45
import argparse
import ctypes
from datetime import date
import sys
def add_data_args(parser: argparse.ArgumentParser):
......@@ -40,3 +42,59 @@ def add_data_args(parser: argparse.ArgumentParser):
parser.add_argument(
'--release_dates_path', type=str, default=None
)
def get_nvidia_cc():
"""
Returns a tuple containing the Compute Capability of the first GPU
installed in the system (formatted as a tuple of strings) and an error
message. When the former is provided, the latter is None, and vice versa.
Adapted from script by Jan Schlüte a
https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
"""
CUDA_SUCCESS = 0
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except OSError:
continue
else:
break
else:
raise OSError("could not load any of: " + ' '.join(libnames))
nGpus = ctypes.c_int()
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
result = ctypes.c_int()
device = ctypes.c_int()
error_str = ctypes.c_char_p()
result = cuda.cuInit(0)
if result != CUDA_SUCCESS:
err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
return None, err.value.decode()
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
if result != CUDA_SUCCESS:
err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
return None, err.value.decode()
if(nGpus.value < 1):
return None, err.value.decode()
result = cuda.cuDeviceGet(ctypes.byref(device), 0)
if result != CUDA_SUCCESS:
err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
return None, err.value.decode()
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != CUDA_SUCCESS:
return None, "Compute Capability not found"
major = cc_major.value
minor = cc_minor.value
return (major, minor), None
......@@ -18,6 +18,8 @@ import subprocess
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
from scripts.utils import get_nvidia_cc
version_dependent_macros = [
'-DVERSION_GE_1_1',
......@@ -44,11 +46,24 @@ def get_cuda_bare_metal_version(cuda_dir):
return raw_output, bare_metal_major, bare_metal_minor
cc_flag = ['-gencode', 'arch=compute_70,code=sm_70']
compute_capabilities = set()
compute_capabilities.add((7, 0))
_, bare_metal_major, _ = get_cuda_bare_metal_version(CUDA_HOME)
if int(bare_metal_major) >= 11:
cc_flag.append('-gencode')
cc_flag.append('arch=compute_80,code=sm_80')
compute_capabilities.add((8, 0))
compute_capability, _ = get_nvidia_cc()
if compute_capability is not None:
compute_capabilities.add(compute_capability)
print(compute_capabilities)
cc_flag = []
for major, minor in list(compute_capabilities):
cc_flag.extend([
'-gencode',
f'arch=compute_{major}{minor},code=sm_{major}{minor}',
])
extra_cuda_flags += cc_flag
......@@ -96,3 +111,6 @@ setup(
'Topic :: Scientific/Engineering :: Artificial Intelligence',
],
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment