Commit a371be30 authored by Tim Dettmers's avatar Tim Dettmers
Browse files

Added CUDA SETUP instruction generator.

parent 62e16493
...@@ -133,5 +133,8 @@ Bug fixes: ...@@ -133,5 +133,8 @@ Bug fixes:
### 0.35.1 ### 0.35.1
Features:
- Added CUDA instruction generator to fix some installations.
Bug fixes: Bug fixes:
- Fixed a problem where warning messages would be displayed even though everything worked correctly. - Fixed a problem where warning messages would be displayed even though everything worked correctly.
import ctypes as ct import ctypes as ct
import torch
from pathlib import Path from pathlib import Path
from warnings import warn from warnings import warn
class CUDASetup(object): class CUDASetup(object):
_instance = None _instance = None
def __init__(self): def __init__(self):
raise RuntimeError("Call get_instance() instead") raise RuntimeError("Call get_instance() instead")
def generate_instructions(self):
if self.cuda is None:
self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA library was not detected.')
self.add_log_entry('CUDA SETUP: Solution 1): Your paths are probably not up-to-date. You can update them via: sudo ldconfig.')
self.add_log_entry('CUDA SETUP: Solution 2): If you do not have sudo rights, you can do the following:')
self.add_log_entry('CUDA SETUP: Solution 2a): Find the cuda library via: find / -name libcuda.so 2>/dev/null')
self.add_log_entry('CUDA SETUP: Solution 2b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_2a')
self.add_log_entry('CUDA SETUP: Solution 2c): For a permanent solution add the export from 2b into your .bashrc file, located at ~/.bashrc')
return
if self.cudart_path is None:
self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.')
self.add_log_entry('CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable')
self.add_log_entry('CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null')
self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a')
self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc')
self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.')
self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh')
self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.')
self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local')
return
make_cmd = f'CUDA_VERSION={self.cuda_version_string}'
if len(self.cuda_version_string) < 3:
make_cmd += ' make cuda92'
elif self.cuda_version_string == '110':
make_cmd += ' make cuda110'
elif self.cuda_version_string[:2] == '11' and int(self.cuda_version_string[2]) > 0:
make_cmd += ' make cuda11x'
has_cublaslt = self.cc in ["7.5", "8.0", "8.6"]
if not has_cublaslt:
make_cmd += '_nomatmul'
self.add_log_entry('CUDA SETUP: Something unexpected happened. Please compile from source:')
self.add_log_entry('git clone git@github.com:TimDettmers/bitsandbytes.git')
self.add_log_entry('cd bitsandbytes')
self.add_log_entry(make_cmd)
self.add_log_entry('python setup.py install')
def initialize(self): def initialize(self):
self.cuda_setup_log = [] self.cuda_setup_log = []
self.lib = None
from .cuda_setup.main import evaluate_cuda_setup from .cuda_setup.main import evaluate_cuda_setup
binary_name = evaluate_cuda_setup() binary_name, cudart_path, cuda, cc, cuda_version_string = evaluate_cuda_setup()
self.cudart_path = cudart_path
self.cuda = cuda
self.cc = cc
self.cuda_version_string = cuda_version_string
package_dir = Path(__file__).parent package_dir = Path(__file__).parent
binary_path = package_dir / binary_name binary_path = package_dir / binary_name
try: try:
if not binary_path.exists(): if not binary_path.exists():
self.add_log_entry(f"CUDA SETUP: TODO: compile library for specific version: {binary_name}") self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
legacy_binary_name = "libbitsandbytes.so" legacy_binary_name = "libbitsandbytes.so"
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
binary_path = package_dir / legacy_binary_name binary_path = package_dir / legacy_binary_name
if not binary_path.exists(): if not binary_path.exists():
self.add_log_entry('CUDA SETUP: CUDA detection failed. Either CUDA driver not installed, CUDA not installed, or you have multiple conflicting CUDA libraries!') self.add_log_entry('')
self.add_log_entry('='*48 + 'ERROR' + '='*37)
self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
self.add_log_entry('1. CUDA driver not installed')
self.add_log_entry('2. CUDA not installed')
self.add_log_entry('3. You have multiple conflicting CUDA libraries')
self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!')
self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.') self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.')
self.add_log_entry('='*80)
self.add_log_entry('')
self.generate_instructions()
self.print_log_stack() self.print_log_stack()
raise Exception('CUDA SETUP: Setup Failed!') raise Exception('CUDA SETUP: Setup Failed!')
self.lib = ct.cdll.LoadLibrary(binary_path) self.lib = ct.cdll.LoadLibrary(binary_path)
...@@ -56,6 +112,13 @@ class CUDASetup(object): ...@@ -56,6 +112,13 @@ class CUDASetup(object):
lib = CUDASetup.get_instance().lib lib = CUDASetup.get_instance().lib
try: try:
if lib is None and torch.cuda.is_available():
CUDASetup.get_instance().generate_instructions()
CUDASetup.get_instance().print_log_stack()
raise RuntimeError('''
CUDA Setup failed despite GPU being available. Inspect the CUDA SETUP outputs to fix your environment!
If you cannot find any issues and suspect a bug, please open an issue with detals about your environment:
https://github.com/TimDettmers/bitsandbytes/issues''')
lib.cadam32bit_g32 lib.cadam32bit_g32
lib.get_context.restype = ct.c_void_p lib.get_context.restype = ct.c_void_p
lib.get_cusparse.restype = ct.c_void_p lib.get_cusparse.restype = ct.c_void_p
......
...@@ -159,4 +159,4 @@ def evaluate_cuda_setup(): ...@@ -159,4 +159,4 @@ def evaluate_cuda_setup():
binary_name = get_binary_name() binary_name = get_binary_name()
return binary_name return binary_name, cudart_path, cuda, cc, cuda_version_string
...@@ -93,12 +93,8 @@ def test_full_system(): ...@@ -93,12 +93,8 @@ def test_full_system():
# but it does not contain the library directly, so we need to look at the a sub-folder # but it does not contain the library directly, so we need to look at the a sub-folder
version = "" version = ""
if "CONDA_PREFIX" in os.environ: if "CONDA_PREFIX" in os.environ:
ls_output, err = bnb.utils.execute_and_return( ls_output, err = bnb.utils.execute_and_return(f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so')
f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so' major, minor, revision = (ls_output.split(" ")[-1].replace("libcudart.so.", "").split("."))
)
major, minor, revision = (
ls_output.split(" ")[-1].replace("libcudart.so.", "").split(".")
)
version = float(f"{major}.{minor}") version = float(f"{major}.{minor}")
if version == "" and "LD_LIBRARY_PATH" in os.environ: if version == "" and "LD_LIBRARY_PATH" in os.environ:
...@@ -114,6 +110,6 @@ def test_full_system(): ...@@ -114,6 +110,6 @@ def test_full_system():
assert version > 0 assert version > 0
binary_name = evaluate_cuda_setup() binary_name, cudart_path, cuda, cc, cuda_version_string = evaluate_cuda_setup()
binary_name = binary_name.replace("libbitsandbytes_cuda", "") binary_name = binary_name.replace("libbitsandbytes_cuda", "")
assert binary_name.startswith(str(version).replace(".", "")) assert binary_name.startswith(str(version).replace(".", ""))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment