CUDA setup cleanup (#996)

* Diagnostics: streamline debug printing code * CUDA setup: Remove unused `backup_paths` * CUDA setup: DRY OS detection * CUDA setup: Streamline `manual_override()` * CUDA setup: Use comment instead of string literal, simplify * CUDA setup: remove duplicate sort The "sort compute capabilities" fix from #703 (#527) would actually do nothing due to this. * CUDA setup: make version number replacement logic more obvious

CUDA setup cleanup (#996)
* Diagnostics: streamline debug printing code * CUDA setup: Remove unused `backup_paths` * CUDA setup: DRY OS detection * CUDA setup: Streamline `manual_override()` * CUDA setup: Use comment instead of string literal, simplify * CUDA setup: remove duplicate sort The "sort compute capabilities" fix from #703 (#527) would actually do nothing due to this. * CUDA setup: make version number replacement logic more obvious
259ad441 · Aarni Koskela · GitHub · acc7fb37 · 259ad441 · 259ad441
Unverified Commit 259ad441 authored Feb 04, 2024 by Aarni Koskela Committed by GitHub Feb 04, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 66 additions and 86 deletions

bitsandbytes/__main__.py bitsandbytes/__main__.py +23 -46

bitsandbytes/cuda_setup/main.py bitsandbytes/cuda_setup/main.py +43 -40

No files found.
--- a/bitsandbytes/__main__.py
+++ b/bitsandbytes/__main__.py
+import glob
 import os
-from os.path import isdir
 import sys
 from warnings import warn
@@ -8,17 +8,9 @@ import torch
 HEADER_WIDTH = 60
-def find_file_recursive(folder, filename):
+def find_dynamic_library(folder, filename):
-    import glob
+    for ext in ("so", "dll", "dylib"):
-    outs = []
+        yield from glob.glob(os.path.join(folder, "**", filename + ext))
-    try:
-        for ext in ["so", "dll", "dylib"]:
-            out = glob.glob(os.path.join(folder, "**", filename + ext))
-            outs.extend(out)
-    except Exception as e:
-        raise RuntimeError('Error: Something when wrong when trying to find file.') from e
-    return outs
 def generate_bug_report_information():
@@ -27,40 +19,25 @@ def generate_bug_report_information():
    print_header("")
    print('')
-    if 'CONDA_PREFIX' in os.environ:
+    path_sources = [
-        paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*')
+        ("ANACONDA CUDA PATHS", os.environ.get("CONDA_PREFIX")),
-        print_header("ANACONDA CUDA PATHS")
+        ("/usr/local CUDA PATHS", "/usr/local"),
-        print(paths)
+        ("CUDA PATHS", os.environ.get("CUDA_PATH")),
-        print('')
+        ("WORKING DIRECTORY CUDA PATHS", os.getcwd()),
-    if isdir('/usr/local/'):
+    ]
-        paths = find_file_recursive('/usr/local', '*cuda*')
+    try:
-        print_header("/usr/local CUDA PATHS")
+        ld_library_path = os.environ.get("LD_LIBRARY_PATH")
-        print(paths)
+        if ld_library_path:
-        print('')
+            for path in set(ld_library_path.strip().split(os.pathsep)):
-    if 'CUDA_PATH' in os.environ and isdir(os.environ['CUDA_PATH']):
+                path_sources.append((f"LD_LIBRARY_PATH {path} CUDA PATHS", path))
-        paths = find_file_recursive(os.environ['CUDA_PATH'], '*cuda*')
+    except Exception as e:
-        print_header("CUDA PATHS")
+        print(f"Could not parse LD_LIBRARY_PATH: {e}")
-        print(paths)
-        print('')
+    for name, path in path_sources:
+        if path and os.path.isdir(path):
-    if isdir(os.getcwd()):
+            print_header(name)
-        paths = find_file_recursive(os.getcwd(), '*cuda*')
+            print(list(find_dynamic_library(path, '*cuda*')))
-        print_header("WORKING DIRECTORY CUDA PATHS")
+            print("")
-        print(paths)
-        print('')
-    print_header("LD_LIBRARY CUDA PATHS")
-    if 'LD_LIBRARY_PATH' in os.environ:
-        lib_path = os.environ['LD_LIBRARY_PATH'].strip()
-        for path in set(lib_path.split(os.pathsep)):
-            try:
-                if isdir(path):
-                    print_header(f"{path} CUDA PATHS")
-                    paths = find_file_recursive(path, '*cuda*')
-                    print(paths)
-            except Exception as e:
-                print(f'Could not read LD_LIBRARY_PATH: {path} ({e})')
-    print('')
 def print_header(

--- a/bitsandbytes/cuda_setup/main.py
+++ b/bitsandbytes/cuda_setup/main.py
@@ -28,19 +28,17 @@ import torch
 from .env_vars import get_potentially_lib_path_containing_env_vars
-# these are the most common libs names
+if platform.system() == 'Windows':  # Windows
-# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
-# we have libcudart.so.11.0 which causes a lot of errors before
-# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
-system = platform.system()
-if system == 'Windows':
    CUDA_RUNTIME_LIBS = ["nvcuda.dll"]
-else: # Linux or other
+    DYNAMIC_LIBRARY_SUFFIX = ".dll"
-    CUDA_RUNTIME_LIBS = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0', 'libcudart.so.12.1', 'libcudart.so.12.2']
+else:  # Linux or other
+    # these are the most common libs names
+    # libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
+    # we have libcudart.so.11.0 which causes a lot of errors before
+    # not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
+    CUDA_RUNTIME_LIBS = ["libcudart.so", "libcudart.so.11.0", "libcudart.so.12.0", "libcudart.so.12.1", "libcudart.so.12.2"]
+    DYNAMIC_LIBRARY_SUFFIX = ".so"
-# this is a order list of backup paths to search CUDA in, if it cannot be found in the main environmental paths
-backup_paths = []
-backup_paths.append('$CONDA_PREFIX/lib/libcudart.so.11.0')
 class CUDASetup:
    _instance = None
@@ -108,22 +106,30 @@ class CUDASetup:
            self.error = False
    def manual_override(self):
-        if torch.cuda.is_available():
+        if not torch.cuda.is_available():
-            if 'BNB_CUDA_VERSION' in os.environ:
+            return
-                if len(os.environ['BNB_CUDA_VERSION']) > 0:
+        override_value = os.environ.get('BNB_CUDA_VERSION')
-                    warn(
+        if not override_value:
-                        f'\n\n{"=" * 80}\n'
+            return
-                        'WARNING: Manual override via BNB_CUDA_VERSION env variable detected!\n'
-                        'BNB_CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
+        binary_name_stem, _, binary_name_ext = self.binary_name.rpartition(".")
-                        'If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n'
+        # `binary_name_stem` will now be e.g. `/foo/bar/libbitsandbytes_cuda118`;
-                        'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
+        # let's remove any trailing numbers:
-                        'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
+        binary_name_stem = binary_name_stem.rstrip("0123456789")
-                        f'Loading CUDA version: BNB_CUDA_VERSION={os.environ["BNB_CUDA_VERSION"]}'
+        # `binary_name_stem` will now be e.g. `/foo/bar/libbitsandbytes_cuda`;
-                        f'\n{"=" * 80}\n\n'
+        # let's tack the new version number and the original extension back on.
-                    )
+        self.binary_name = f"{binary_name_stem}{override_value}.{binary_name_ext}"
-                    binary_name = self.binary_name.rsplit(".", 1)[0]
-                    suffix = ".so" if os.name != "nt" else ".dll"
+        warn(
-                    self.binary_name = binary_name[:-3] + f'{os.environ["BNB_CUDA_VERSION"]}.{suffix}'
+            f'\n\n{"=" * 80}\n'
+            'WARNING: Manual override via BNB_CUDA_VERSION env variable detected!\n'
+            'BNB_CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
+            'If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n'
+            'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
+            'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
+            f'Loading: {self.binary_name}'
+            f'\n{"=" * 80}\n\n'
+        )
    def run_cuda_setup(self):
        self.initialized = True
@@ -140,11 +146,10 @@ class CUDASetup:
        package_dir = Path(__file__).parent.parent
        binary_path = package_dir / self.binary_name
-        suffix = ".so" if os.name != "nt" else ".dll"
        try:
            if not binary_path.exists():
                self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
-                legacy_binary_name = f"libbitsandbytes_cpu{suffix}"
+                legacy_binary_name = f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
                self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
                binary_path = package_dir / legacy_binary_name
                if not binary_path.exists() or torch.cuda.is_available():
@@ -348,19 +353,18 @@ def get_compute_capabilities():
 def evaluate_cuda_setup():
    cuda_setup = CUDASetup.get_instance()
-    suffix = ".so" if os.name != "nt" else ".dll"
    if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
        cuda_setup.add_log_entry('')
        cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35)
        cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
              ('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
        cuda_setup.add_log_entry('='*80)
-    if not torch.cuda.is_available(): return f'libbitsandbytes_cpu{suffix}', None, None, None
+    if not torch.cuda.is_available():
+        return f'libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}', None, None, None
    cudart_path = determine_cuda_runtime_lib_path()
-    ccs = get_compute_capabilities()
+    cc = get_compute_capabilities()[-1]  # we take the highest capability
-    ccs.sort()
-    cc = ccs[-1] # we take the highest capability
    cuda_version_string = get_cuda_version()
    cuda_setup.add_log_entry(f"CUDA SETUP: PyTorch settings found: CUDA_VERSION={cuda_version_string}, Highest Compute Capability: {cc}.")
@@ -380,12 +384,11 @@ def evaluate_cuda_setup():
    # we use ls -l instead of nvcc to determine the cuda version
    # since most installations will have the libcudart.so installed, but not the compiler
-    if has_cublaslt:
+    binary_name = f"libbitsandbytes_cuda{cuda_version_string}"
-        binary_name = f"libbitsandbytes_cuda{cuda_version_string}"
+    if not has_cublaslt:
-    else:
+        # if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt
-        "if not has_cublaslt (CC < 7.5), then we have to choose  _nocublaslt"
+        binary_name += "_nocublaslt"
-        binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt"
-    binary_name = f"{binary_name}{suffix}"
+    binary_name = f"{binary_name}{DYNAMIC_LIBRARY_SUFFIX}"
    return binary_name, cudart_path, cc, cuda_version_string