Fix kernel installation bug

20fbe840 · Gustaf Ahdritz · 87f3cd45 · 20fbe840 · 20fbe840
Commit 20fbe840 authored Aug 03, 2022 by Gustaf Ahdritz
Show whitespace changes
Inline Side-by-side

Showing with 88 additions and 12 deletions

scripts/utils.py scripts/utils.py +59 -1

setup.py setup.py +29 -11

No files found.
--- a/scripts/utils.py
+++ b/scripts/utils.py
 import argparse
+import ctypes
 from datetime import date
+import sys


 def add_data_args(parser: argparse.ArgumentParser):
@@ -40,3 +42,59 @@ def add_data_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        '--release_dates_path', type=str, default=None
    )
+
+
+def get_nvidia_cc():
+    """
+    Returns a tuple containing the Compute Capability of the first GPU
+    installed in the system (formatted as a tuple of strings) and an error
+    message. When the former is provided, the latter is None, and vice versa.
+
+    Adapted from script by Jan Schlüte a
+    https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
+    """
+    CUDA_SUCCESS = 0
+
+    libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
+    for libname in libnames:
+        try:
+            cuda = ctypes.CDLL(libname)
+        except OSError:
+            continue
+        else:
+            break
+    else:
+        raise OSError("could not load any of: " + ' '.join(libnames))
+
+    nGpus = ctypes.c_int()
+    cc_major = ctypes.c_int()
+    cc_minor = ctypes.c_int()
+
+    result = ctypes.c_int()
+    device = ctypes.c_int()
+    error_str = ctypes.c_char_p()
+
+    result = cuda.cuInit(0)
+    if result != CUDA_SUCCESS:
+        err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
+        return None, err.value.decode()
+    result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
+    if result != CUDA_SUCCESS:
+        err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
+        return None, err.value.decode()
+
+    if(nGpus.value < 1):
+        return None, err.value.decode()
+
+    result = cuda.cuDeviceGet(ctypes.byref(device), 0)
+    if result != CUDA_SUCCESS:
+        err = cuda.cuGetErrorString(result, ctypes.byref(error_str))
+        return None, err.value.decode()
+
+    if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != CUDA_SUCCESS:
+        return None, "Compute Capability not found"
+
+    major = cc_major.value
+    minor = cc_minor.value
+
+    return (major, minor), None
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,8 @@ import subprocess

 from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME

+from scripts.utils import get_nvidia_cc
+

 version_dependent_macros = [
    '-DVERSION_GE_1_1',
@@ -44,11 +46,24 @@ def get_cuda_bare_metal_version(cuda_dir):

    return raw_output, bare_metal_major, bare_metal_minor

-cc_flag = ['-gencode', 'arch=compute_70,code=sm_70']
+compute_capabilities = set()
+compute_capabilities.add((7, 0))
 _, bare_metal_major, _ = get_cuda_bare_metal_version(CUDA_HOME)
 if int(bare_metal_major) >= 11:
-    cc_flag.append('-gencode')
-    cc_flag.append('arch=compute_80,code=sm_80')
+    compute_capabilities.add((8, 0))
+
+compute_capability, _ = get_nvidia_cc()
+if compute_capability is not None:
+    compute_capabilities.add(compute_capability)
+
+print(compute_capabilities)
+
+cc_flag = []
+for major, minor in list(compute_capabilities):
+    cc_flag.extend([
+        '-gencode',
+        f'arch=compute_{major}{minor},code=sm_{major}{minor}',
+    ])

 extra_cuda_flags += cc_flag

@@ -96,3 +111,6 @@ setup(
        'Topic :: Scientific/Engineering :: Artificial Intelligence',
    ],
 )
+
+
+