Unverified Commit fd723b78 authored by Titus's avatar Titus Committed by GitHub
Browse files

Merge pull request #1041 from akx/cuda-wagh

Rework CUDA/native-library setup and diagnostics
parents ce597c63 79d1cccc
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# This source code is licensed under the MIT license found in the # This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree. # LICENSE file in the root directory of this source tree.
from . import cuda_setup, research, utils from . import research, utils
from .autograd._functions import ( from .autograd._functions import (
MatmulLtState, MatmulLtState,
bmm_cublas, bmm_cublas,
...@@ -12,11 +12,8 @@ from .autograd._functions import ( ...@@ -12,11 +12,8 @@ from .autograd._functions import (
matmul_cublas, matmul_cublas,
mm_cublas, mm_cublas,
) )
from .cextension import COMPILED_WITH_CUDA
from .nn import modules from .nn import modules
from .optim import adam
if COMPILED_WITH_CUDA:
from .optim import adam
__pdoc__ = { __pdoc__ = {
"libbitsandbytes": False, "libbitsandbytes": False,
...@@ -25,5 +22,3 @@ __pdoc__ = { ...@@ -25,5 +22,3 @@ __pdoc__ = {
} }
__version__ = "0.44.0.dev" __version__ = "0.44.0.dev"
PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes"
import glob
import os
import sys
from warnings import warn
import torch
HEADER_WIDTH = 60
def find_dynamic_library(folder, filename):
for ext in ("so", "dll", "dylib"):
yield from glob.glob(os.path.join(folder, "**", filename + ext))
def generate_bug_report_information():
print_header("")
print_header("BUG REPORT INFORMATION")
print_header("")
print('')
path_sources = [
("ANACONDA CUDA PATHS", os.environ.get("CONDA_PREFIX")),
("/usr/local CUDA PATHS", "/usr/local"),
("CUDA PATHS", os.environ.get("CUDA_PATH")),
("WORKING DIRECTORY CUDA PATHS", os.getcwd()),
]
try:
ld_library_path = os.environ.get("LD_LIBRARY_PATH")
if ld_library_path:
for path in set(ld_library_path.strip().split(os.pathsep)):
path_sources.append((f"LD_LIBRARY_PATH {path} CUDA PATHS", path))
except Exception as e:
print(f"Could not parse LD_LIBRARY_PATH: {e}")
for name, path in path_sources:
if path and os.path.isdir(path):
print_header(name)
print(list(find_dynamic_library(path, '*cuda*')))
print("")
def print_header(
txt: str, width: int = HEADER_WIDTH, filler: str = "+"
) -> None:
txt = f" {txt} " if txt else ""
print(txt.center(width, filler))
def print_debug_info() -> None:
from . import PACKAGE_GITHUB_URL
print(
"\nAbove we output some debug information. Please provide this info when "
f"creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose ...\n"
)
def main():
generate_bug_report_information()
from . import COMPILED_WITH_CUDA
from .cuda_setup.main import get_compute_capabilities
print_header("OTHER")
print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
print_header("")
print_header("DEBUG INFO END")
print_header("")
print("Checking that the library is importable and CUDA is callable...")
print("\nWARNING: Please be sure to sanitize sensitive info from any such env vars!\n")
try:
from bitsandbytes.optim import Adam
p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()
p1 = p.data.sum().item()
adam = Adam([p])
out = a * p
loss = out.sum()
loss.backward()
adam.step()
p2 = p.data.sum().item()
assert p1 != p2
print("SUCCESS!")
print("Installation was successful!")
except ImportError:
print()
warn(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
print_debug_info()
except Exception as e:
print(e)
print_debug_info()
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
from bitsandbytes.diagnostics.main import main
main() main()
"""
extract factors the build is dependent on:
[X] compute capability
[ ] TODO: Q - What if we have multiple GPUs of different makes?
- CUDA version
- Software:
- CPU-only: only CPU quantization functions (no optimizer, no matrix multiple)
- CuBLAS-LT: full-build 8-bit optimizer
- no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
evaluation:
- if paths faulty, return meaningful error
- else:
- determine CUDA version
- determine capabilities
- based on that set the default path
"""
import ctypes as ct import ctypes as ct
from warnings import warn import logging
import os
from pathlib import Path
import torch import torch
from bitsandbytes.cuda_setup.main import CUDASetup from bitsandbytes.consts import DYNAMIC_LIBRARY_SUFFIX, PACKAGE_DIR
from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs
logger = logging.getLogger(__name__)
def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
"""
Get the disk path to the CUDA BNB native library specified by the
given CUDA specs, taking into account the `BNB_CUDA_VERSION` override environment variable.
The library is not guaranteed to exist at the returned path.
"""
library_name = f"libbitsandbytes_cuda{cuda_specs.cuda_version_string}"
if not cuda_specs.has_cublaslt:
# if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt
library_name += "_nocublaslt"
library_name = f"{library_name}{DYNAMIC_LIBRARY_SUFFIX}"
override_value = os.environ.get("BNB_CUDA_VERSION")
if override_value:
library_name_stem, _, library_name_ext = library_name.rpartition(".")
# `library_name_stem` will now be e.g. `libbitsandbytes_cuda118`;
# let's remove any trailing numbers:
library_name_stem = library_name_stem.rstrip("0123456789")
# `library_name_stem` will now be e.g. `libbitsandbytes_cuda`;
# let's tack the new version number and the original extension back on.
library_name = f"{library_name_stem}{override_value}.{library_name_ext}"
logger.warning(
f"WARNING: BNB_CUDA_VERSION={override_value} environment variable detected; loading {library_name}.\n"
"This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n"
"If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n"
"If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n"
"For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n"
)
return PACKAGE_DIR / library_name
class BNBNativeLibrary:
_lib: ct.CDLL
compiled_with_cuda = False
def __init__(self, lib: ct.CDLL):
self._lib = lib
def __getattr__(self, item):
return getattr(self._lib, item)
class CudaBNBNativeLibrary(BNBNativeLibrary):
compiled_with_cuda = True
def __init__(self, lib: ct.CDLL):
super().__init__(lib)
lib.get_context.restype = ct.c_void_p
lib.get_cusparse.restype = ct.c_void_p
lib.cget_managed_ptr.restype = ct.c_void_p
def get_native_library() -> BNBNativeLibrary:
binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
cuda_specs = get_cuda_specs()
if cuda_specs:
cuda_binary_path = get_cuda_bnb_library_path(cuda_specs)
if cuda_binary_path.exists():
binary_path = cuda_binary_path
else:
logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path)
logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
dll = ct.cdll.LoadLibrary(str(binary_path))
if hasattr(dll, "get_context"): # only a CUDA-built library exposes this
return CudaBNBNativeLibrary(dll)
logger.warning(
"The installed version of bitsandbytes was compiled without GPU support. "
"8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable."
)
return BNBNativeLibrary(dll)
setup = CUDASetup.get_instance()
if setup.initialized != True:
setup.run_cuda_setup()
lib = setup.lib
try: try:
if lib is None and torch.cuda.is_available(): lib = get_native_library()
CUDASetup.get_instance().generate_instructions() except Exception as e:
CUDASetup.get_instance().print_log_stack() lib = None
raise RuntimeError(''' logger.error(f"Could not load bitsandbytes native library: {e}", exc_info=True)
CUDA Setup failed despite GPU being available. Please run the following command to get more information: if torch.cuda.is_available():
logger.warning(
python -m bitsandbytes """
CUDA Setup failed despite CUDA being available. Please run the following command to get more information:
Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes python -m bitsandbytes
and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues''')
_ = lib.cadam32bit_grad_fp32 # runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
lib.get_context.restype = ct.c_void_p to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
lib.get_cusparse.restype = ct.c_void_p and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues
lib.cget_managed_ptr.restype = ct.c_void_p """
COMPILED_WITH_CUDA = True )
except AttributeError as ex:
warn("The installed version of bitsandbytes was compiled without GPU support. "
"8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.")
COMPILED_WITH_CUDA = False
print(str(ex))
# print the setup details after checking for errors so we do not print twice
#if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
#setup.print_log_stack()
from pathlib import Path
import platform
DYNAMIC_LIBRARY_SUFFIX = {
"Darwin": ".dylib",
"Linux": ".so",
"Windows": ".dll",
}.get(platform.system(), ".so")
PACKAGE_DIR = Path(__file__).parent
PACKAGE_GITHUB_URL = "https://github.com/TimDettmers/bitsandbytes"
NONPYTORCH_DOC_URL = "https://github.com/TimDettmers/bitsandbytes/blob/main/docs/source/nonpytorchcuda.mdx"
import os
from typing import Dict
def to_be_ignored(env_var: str, value: str) -> bool:
ignorable = {
"PWD", # PWD: this is how the shell keeps track of the current working dir
"OLDPWD",
"SSH_AUTH_SOCK", # SSH stuff, therefore unrelated
"SSH_TTY",
"GOOGLE_VM_CONFIG_LOCK_FILE", # GCP: requires elevated permissions, causing problems in VMs and Jupyter notebooks
"HOME", # Linux shell default
"TMUX", # Terminal Multiplexer
"XDG_DATA_DIRS", # XDG: Desktop environment stuff
"XDG_GREETER_DATA_DIR", # XDG: Desktop environment stuff
"XDG_RUNTIME_DIR",
"MAIL", # something related to emails
"SHELL", # binary for currently invoked shell
"DBUS_SESSION_BUS_ADDRESS", # hardware related
"PATH", # this is for finding binaries, not libraries
"LESSOPEN", # related to the `less` command
"LESSCLOSE",
"_", # current Python interpreter
}
return env_var in ignorable
def might_contain_a_path(candidate: str) -> bool:
return os.sep in candidate
def is_active_conda_env(env_var: str) -> bool:
return "CONDA_PREFIX" == env_var
def is_other_conda_env_var(env_var: str) -> bool:
return "CONDA" in env_var
def is_relevant_candidate_env_var(env_var: str, value: str) -> bool:
return is_active_conda_env(env_var) or (
might_contain_a_path(value) and not
is_other_conda_env_var(env_var) and not
to_be_ignored(env_var, value)
)
def get_potentially_lib_path_containing_env_vars() -> Dict[str, str]:
return {
env_var: value
for env_var, value in os.environ.items()
if is_relevant_candidate_env_var(env_var, value)
}
"""
extract factors the build is dependent on:
[X] compute capability
[ ] TODO: Q - What if we have multiple GPUs of different makes?
- CUDA version
- Software:
- CPU-only: only CPU quantization functions (no optimizer, no matrix multiply)
- CuBLAS-LT: full-build 8-bit optimizer
- no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
evaluation:
- if paths faulty, return meaningful error
- else:
- determine CUDA version
- determine capabilities
- based on that set the default path
"""
import ctypes as ct
import errno
import os
from pathlib import Path
import platform
from typing import Set, Union
from warnings import warn
import torch
from .env_vars import get_potentially_lib_path_containing_env_vars
DYNAMIC_LIBRARY_SUFFIX = { "Darwin": ".dylib", "Windows": ".dll", "Linux": ".so"}.get(platform.system(), ".so")
if platform.system() == "Windows": # Windows
CUDA_RUNTIME_LIBS = ["cudart64_110.dll", "cudart64_12.dll"]
else: # Linux or other
# these are the most common libs names
# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
# we have libcudart.so.11.0 which causes a lot of errors before
# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
CUDA_RUNTIME_LIBS = ["libcudart.so", "libcudart.so.11.0", "libcudart.so.12.0", "libcudart.so.12.1", "libcudart.so.12.2"]
class CUDASetup:
_instance = None
def __init__(self):
raise RuntimeError("Call get_instance() instead")
def generate_instructions(self):
if getattr(self, 'error', False): return
print(self.error)
self.error = True
if not self.cuda_available:
self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA library was not detected or CUDA not installed.')
self.add_log_entry('CUDA SETUP: Solution 1): Your paths are probably not up-to-date. You can update them via: sudo ldconfig.')
self.add_log_entry('CUDA SETUP: Solution 2): If you do not have sudo rights, you can do the following:')
self.add_log_entry('CUDA SETUP: Solution 2a): Find the cuda library via: find / -name libcuda.so 2>/dev/null')
self.add_log_entry('CUDA SETUP: Solution 2b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_2a')
self.add_log_entry('CUDA SETUP: Solution 2c): For a permanent solution add the export from 2b into your .bashrc file, located at ~/.bashrc')
self.add_log_entry('CUDA SETUP: Solution 3): For a missing CUDA runtime library (libcudart.so), use `find / -name libcudart.so* and follow with step (2b)')
return
if self.cudart_path is None:
self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.')
self.add_log_entry('CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable')
self.add_log_entry('CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null')
self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a')
self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc')
self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.')
self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://raw.githubusercontent.com/TimDettmers/bitsandbytes/main/cuda_install.sh')
self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.')
self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local')
return
make_cmd = f'CUDA_VERSION={self.cuda_version_string}'
if len(self.cuda_version_string) < 3:
make_cmd += ' make cuda92'
elif self.cuda_version_string == '110':
make_cmd += ' make cuda110'
elif self.cuda_version_string[:2] == '11' and int(self.cuda_version_string[2]) > 0:
make_cmd += ' make cuda11x'
elif self.cuda_version_string[:2] == '12' and 1 >= int(self.cuda_version_string[2]) >= 0:
make_cmd += ' make cuda12x'
elif self.cuda_version_string == '100':
self.add_log_entry('CUDA SETUP: CUDA 10.0 not supported. Please use a different CUDA version.')
self.add_log_entry('CUDA SETUP: Before you try again running bitsandbytes, make sure old CUDA 10.0 versions are uninstalled and removed from $LD_LIBRARY_PATH variables.')
return
has_cublaslt = is_cublasLt_compatible(self.cc)
if not has_cublaslt:
make_cmd += '_nomatmul'
self.add_log_entry('CUDA SETUP: Something unexpected happened. Please compile from source:')
self.add_log_entry('git clone https://github.com/TimDettmers/bitsandbytes.git')
self.add_log_entry('cd bitsandbytes')
self.add_log_entry(make_cmd)
self.add_log_entry('python setup.py install')
def initialize(self):
if not getattr(self, 'initialized', False):
self.has_printed = False
self.lib = None
self.initialized = False
self.error = False
def manual_override(self):
if not torch.cuda.is_available():
return
override_value = os.environ.get('BNB_CUDA_VERSION')
if not override_value:
return
binary_name_stem, _, binary_name_ext = self.binary_name.rpartition(".")
# `binary_name_stem` will now be e.g. `/foo/bar/libbitsandbytes_cuda118`;
# let's remove any trailing numbers:
binary_name_stem = binary_name_stem.rstrip("0123456789")
# `binary_name_stem` will now be e.g. `/foo/bar/libbitsandbytes_cuda`;
# let's tack the new version number and the original extension back on.
self.binary_name = f"{binary_name_stem}{override_value}.{binary_name_ext}"
warn(
f'\n\n{"=" * 80}\n'
'WARNING: Manual override via BNB_CUDA_VERSION env variable detected!\n'
'BNB_CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n'
'If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n'
'If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n'
'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
f'Loading: {self.binary_name}'
f'\n{"=" * 80}\n\n'
)
def run_cuda_setup(self):
self.initialized = True
self.cuda_setup_log = []
binary_name, cudart_path, cc, cuda_version_string = evaluate_cuda_setup()
self.cudart_path = cudart_path
self.cuda_available = torch.cuda.is_available()
self.cc = cc
self.cuda_version_string = cuda_version_string
self.binary_name = binary_name
self.manual_override()
package_dir = Path(__file__).parent.parent
binary_path = package_dir / self.binary_name
try:
if not binary_path.exists():
self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
legacy_binary_name = f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
binary_path = package_dir / legacy_binary_name
if not binary_path.exists() or torch.cuda.is_available():
self.add_log_entry('')
self.add_log_entry('='*48 + 'ERROR' + '='*37)
self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:')
self.add_log_entry('1. You need to manually override the PyTorch CUDA version. Please see: '
'"https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md')
self.add_log_entry('2. CUDA driver not installed')
self.add_log_entry('3. CUDA not installed')
self.add_log_entry('4. You have multiple conflicting CUDA libraries')
self.add_log_entry('5. Required library not pre-compiled for this bitsandbytes release!')
self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=118`.')
self.add_log_entry('CUDA SETUP: The CUDA version for the compile might depend on your conda install. Inspect CUDA version via `conda list | grep cuda`.')
self.add_log_entry('='*80)
self.add_log_entry('')
self.generate_instructions()
raise Exception('CUDA SETUP: Setup Failed!')
self.lib = ct.cdll.LoadLibrary(str(binary_path))
else:
self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path!s}...")
self.lib = ct.cdll.LoadLibrary(str(binary_path))
except Exception as ex:
self.add_log_entry(str(ex))
def add_log_entry(self, msg, is_warning=False):
self.cuda_setup_log.append((msg, is_warning))
def print_log_stack(self):
for msg, is_warning in self.cuda_setup_log:
if is_warning:
warn(msg)
else:
print(msg)
@classmethod
def get_instance(cls):
if cls._instance is None:
cls._instance = cls.__new__(cls)
cls._instance.initialize()
return cls._instance
def is_cublasLt_compatible(cc):
has_cublaslt = False
if cc is not None:
cc_major, cc_minor = cc.split('.')
if int(cc_major) < 7 or (int(cc_major) == 7 and int(cc_minor) < 5):
CUDASetup.get_instance().add_log_entry("WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU! \
If you run into issues with 8-bit matmul, you can try 4-bit quantization: https://huggingface.co/blog/4bit-transformers-bitsandbytes", is_warning=True)
else:
has_cublaslt = True
return has_cublaslt
def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]:
return {Path(ld_path) for ld_path in paths_list_candidate.split(os.pathsep) if ld_path}
def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
existent_directories: Set[Path] = set()
for path in candidate_paths:
try:
if path.exists():
existent_directories.add(path)
except PermissionError:
# Handle the PermissionError first as it is a subtype of OSError
# https://docs.python.org/3/library/exceptions.html#exception-hierarchy
pass
except OSError as exc:
if exc.errno != errno.ENAMETOOLONG:
raise exc
non_existent_directories: Set[Path] = candidate_paths - existent_directories
if non_existent_directories:
CUDASetup.get_instance().add_log_entry(
f"The following directories listed in your path were found to be non-existent: {non_existent_directories}",
is_warning=False,
)
return existent_directories
def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]:
paths = set()
for libname in CUDA_RUNTIME_LIBS:
for path in candidate_paths:
try:
if (path / libname).is_file():
paths.add(path / libname)
except PermissionError:
pass
return paths
def resolve_paths_list(paths_list_candidate: str) -> Set[Path]:
"""
Searches a given environmental var for the CUDA runtime library,
i.e. `libcudart.so`.
"""
return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate))
def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]:
return get_cuda_runtime_lib_paths(
resolve_paths_list(paths_list_candidate)
)
def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None:
if len(results_paths) > 1:
warning_msg = (
f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. "
"We select the PyTorch default libcudart.so, which is {torch.version.cuda},"
"but this might mismatch with the CUDA version that is needed for bitsandbytes."
"To override this behavior set the BNB_CUDA_VERSION=<version string, e.g. 122> environmental variable"
"For example, if you want to use the CUDA version 122"
"BNB_CUDA_VERSION=122 python ..."
"OR set the environmental variable in your .bashrc: export BNB_CUDA_VERSION=122"
"In the case of a manual override, make sure you set the LD_LIBRARY_PATH, e.g."
"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.2")
CUDASetup.get_instance().add_log_entry(warning_msg, is_warning=True)
def determine_cuda_runtime_lib_path() -> Union[Path, None]:
"""
Searches for a cuda installations, in the following order of priority:
1. active conda env
2. LD_LIBRARY_PATH
3. any other env vars, while ignoring those that
- are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`)
- don't contain the path separator `/`
If multiple libraries are found in part 3, we optimistically try one,
while giving a warning message.
"""
candidate_env_vars = get_potentially_lib_path_containing_env_vars()
cuda_runtime_libs = set()
if "CONDA_PREFIX" in candidate_env_vars:
conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib"
conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path))
warn_in_case_of_duplicates(conda_cuda_libs)
if conda_cuda_libs:
cuda_runtime_libs.update(conda_cuda_libs)
CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["CONDA_PREFIX"]} did not contain '
f'{CUDA_RUNTIME_LIBS} as expected! Searching further paths...', is_warning=True)
if "LD_LIBRARY_PATH" in candidate_env_vars:
lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"])
if lib_ld_cuda_libs:
cuda_runtime_libs.update(lib_ld_cuda_libs)
warn_in_case_of_duplicates(lib_ld_cuda_libs)
CUDASetup.get_instance().add_log_entry(f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain '
f'{CUDA_RUNTIME_LIBS} as expected! Searching further paths...', is_warning=True)
remaining_candidate_env_vars = {
env_var: value for env_var, value in candidate_env_vars.items()
if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"}
}
cuda_runtime_libs = set()
for env_var, value in remaining_candidate_env_vars.items():
cuda_runtime_libs.update(find_cuda_lib_in(value))
if len(cuda_runtime_libs) == 0:
CUDASetup.get_instance().add_log_entry('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...')
cuda_runtime_libs.update(find_cuda_lib_in('/usr/local/cuda/lib64'))
warn_in_case_of_duplicates(cuda_runtime_libs)
cuda_setup = CUDASetup.get_instance()
cuda_setup.add_log_entry(f'DEBUG: Possible options found for libcudart.so: {cuda_runtime_libs}')
return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
def get_cuda_version():
major, minor = map(int, torch.version.cuda.split("."))
if major < 11:
CUDASetup.get_instance().add_log_entry('CUDA SETUP: CUDA version lower than 11 are currently not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!')
return f'{major}{minor}'
def get_compute_capabilities():
ccs = []
for i in range(torch.cuda.device_count()):
cc_major, cc_minor = torch.cuda.get_device_capability(torch.cuda.device(i))
ccs.append(f"{cc_major}.{cc_minor}")
ccs.sort(key=lambda v: tuple(map(int, str(v).split("."))))
return ccs
def evaluate_cuda_setup():
cuda_setup = CUDASetup.get_instance()
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
cuda_setup.add_log_entry('')
cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35)
cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
cuda_setup.add_log_entry('='*80)
if not torch.cuda.is_available():
return f'libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}', None, None, None
cudart_path = determine_cuda_runtime_lib_path()
cc = get_compute_capabilities()[-1] # we take the highest capability
cuda_version_string = get_cuda_version()
cuda_setup.add_log_entry(f"CUDA SETUP: PyTorch settings found: CUDA_VERSION={cuda_version_string}, Highest Compute Capability: {cc}.")
cuda_setup.add_log_entry(
"CUDA SETUP: To manually override the PyTorch CUDA version please see:"
"https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md"
)
# 7.5 is the minimum CC vor cublaslt
has_cublaslt = is_cublasLt_compatible(cc)
# TODO:
# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (2) Multiple CUDA versions installed
# we use ls -l instead of nvcc to determine the cuda version
# since most installations will have the libcudart.so installed, but not the compiler
binary_name = f"libbitsandbytes_cuda{cuda_version_string}"
if not has_cublaslt:
# if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt
binary_name += "_nocublaslt"
binary_name = f"{binary_name}{DYNAMIC_LIBRARY_SUFFIX}"
return binary_name, cudart_path, cc, cuda_version_string
import dataclasses
from typing import List, Optional, Tuple
import torch
@dataclasses.dataclass(frozen=True)
class CUDASpecs:
highest_compute_capability: Tuple[int, int]
cuda_version_string: str
cuda_version_tuple: Tuple[int, int]
@property
def has_cublaslt(self) -> bool:
return self.highest_compute_capability >= (7, 5)
def get_compute_capabilities() -> List[Tuple[int, int]]:
return sorted(torch.cuda.get_device_capability(torch.cuda.device(i)) for i in range(torch.cuda.device_count()))
def get_cuda_version_tuple() -> Tuple[int, int]:
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
major, minor = map(int, torch.version.cuda.split("."))
return major, minor
def get_cuda_version_string() -> str:
major, minor = get_cuda_version_tuple()
return f"{major}{minor}"
def get_cuda_specs() -> Optional[CUDASpecs]:
if not torch.cuda.is_available():
return None
return CUDASpecs(
highest_compute_capability=(get_compute_capabilities()[-1]),
cuda_version_string=(get_cuda_version_string()),
cuda_version_tuple=get_cuda_version_tuple(),
)
import logging
import os
from pathlib import Path
from typing import Dict, Iterable, Iterator
import torch
from bitsandbytes.cextension import get_cuda_bnb_library_path
from bitsandbytes.consts import NONPYTORCH_DOC_URL
from bitsandbytes.cuda_specs import CUDASpecs
from bitsandbytes.diagnostics.utils import print_dedented
CUDART_PATH_PREFERRED_ENVVARS = ("CONDA_PREFIX", "LD_LIBRARY_PATH")
CUDART_PATH_IGNORED_ENVVARS = {
"DBUS_SESSION_BUS_ADDRESS", # hardware related
"GOOGLE_VM_CONFIG_LOCK_FILE", # GCP: requires elevated permissions, causing problems in VMs and Jupyter notebooks
"HOME", # Linux shell default
"LESSCLOSE",
"LESSOPEN", # related to the `less` command
"MAIL", # something related to emails
"OLDPWD",
"PATH", # this is for finding binaries, not libraries
"PWD", # PWD: this is how the shell keeps track of the current working dir
"SHELL", # binary for currently invoked shell
"SSH_AUTH_SOCK", # SSH stuff, therefore unrelated
"SSH_TTY",
"TMUX", # Terminal Multiplexer
"XDG_DATA_DIRS", # XDG: Desktop environment stuff
"XDG_GREETER_DATA_DIR", # XDG: Desktop environment stuff
"XDG_RUNTIME_DIR",
"_", # current Python interpreter
}
CUDA_RUNTIME_LIB_PATTERNS = (
"cudart64*.dll", # Windows
"libcudart*.so*", # libcudart.so, libcudart.so.11.0, libcudart.so.12.0, libcudart.so.12.1, libcudart.so.12.2 etc.
"nvcuda*.dll", # Windows
)
logger = logging.getLogger(__name__)
def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Iterable[Path]:
for dir_string in paths_list_candidate.split(os.pathsep):
if not dir_string:
continue
if os.sep not in dir_string:
continue
try:
dir = Path(dir_string)
try:
if not dir.exists():
logger.warning(f"The directory listed in your path is found to be non-existent: {dir}")
continue
except OSError: # Assume an esoteric error trying to poke at the directory
pass
for lib_pattern in CUDA_RUNTIME_LIB_PATTERNS:
for pth in dir.glob(lib_pattern):
if pth.is_file():
yield pth
except PermissionError:
pass
def is_relevant_candidate_env_var(env_var: str, value: str) -> bool:
return (
env_var in CUDART_PATH_PREFERRED_ENVVARS # is a preferred location
or (
os.sep in value # might contain a path
and env_var not in CUDART_PATH_IGNORED_ENVVARS # not ignored
and "CONDA" not in env_var # not another conda envvar
and "BASH_FUNC" not in env_var # not a bash function defined via envvar
and "\n" not in value # likely e.g. a script or something?
)
)
def get_potentially_lib_path_containing_env_vars() -> Dict[str, str]:
return {env_var: value for env_var, value in os.environ.items() if is_relevant_candidate_env_var(env_var, value)}
def find_cudart_libraries() -> Iterator[Path]:
"""
Searches for a cuda installations, in the following order of priority:
1. active conda env
2. LD_LIBRARY_PATH
3. any other env vars, while ignoring those that
- are known to be unrelated
- don't contain the path separator `/`
If multiple libraries are found in part 3, we optimistically try one,
while giving a warning message.
"""
candidate_env_vars = get_potentially_lib_path_containing_env_vars()
for envvar in CUDART_PATH_PREFERRED_ENVVARS:
if envvar in candidate_env_vars:
directory = candidate_env_vars[envvar]
yield from find_cuda_libraries_in_path_list(directory)
candidate_env_vars.pop(envvar)
for env_var, value in candidate_env_vars.items():
yield from find_cuda_libraries_in_path_list(value)
def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
print(
f"PyTorch settings found: CUDA_VERSION={cuda_specs.cuda_version_string}, "
f"Highest Compute Capability: {cuda_specs.highest_compute_capability}.",
)
binary_path = get_cuda_bnb_library_path(cuda_specs)
if not binary_path.exists():
print_dedented(
f"""
Library not found: {binary_path}. Maybe you need to compile it from source?
If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`,
for example, `make CUDA_VERSION=113`.
The CUDA version for the compile might depend on your conda install, if using conda.
Inspect CUDA version via `conda list | grep cuda`.
"""
)
cuda_major, cuda_minor = cuda_specs.cuda_version_tuple
if cuda_major < 11:
print_dedented(
"""
WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8().
You will be only to use 8-bit optimizers and quantization routines!
"""
)
print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}")
# 7.5 is the minimum CC for cublaslt
if not cuda_specs.has_cublaslt:
print_dedented(
"""
WARNING: Compute capability < 7.5 detected! Only slow 8-bit matmul is supported for your GPU!
If you run into issues with 8-bit matmul, you can try 4-bit quantization:
https://huggingface.co/blog/4bit-transformers-bitsandbytes
""",
)
# TODO:
# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (2) Multiple CUDA versions installed
def print_cuda_runtime_diagnostics() -> None:
cudart_paths = list(find_cudart_libraries())
if not cudart_paths:
print("CUDA SETUP: WARNING! CUDA runtime files not found in any environmental path.")
elif len(cudart_paths) > 1:
print_dedented(
f"""
Found duplicate CUDA runtime files (see below).
We select the PyTorch default CUDA runtime, which is {torch.version.cuda},
but this might mismatch with the CUDA version that is needed for bitsandbytes.
To override this behavior set the `BNB_CUDA_VERSION=<version string, e.g. 122>` environmental variable.
For example, if you want to use the CUDA version 122,
BNB_CUDA_VERSION=122 python ...
OR set the environmental variable in your .bashrc:
export BNB_CUDA_VERSION=122
In the case of a manual override, make sure you set LD_LIBRARY_PATH, e.g.
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.2,
"""
)
for pth in cudart_paths:
print(f"* Found CUDA runtime at: {pth}")
import sys
import traceback
import torch
from bitsandbytes.consts import PACKAGE_GITHUB_URL
from bitsandbytes.cuda_specs import get_cuda_specs
from bitsandbytes.diagnostics.cuda import (
print_cuda_diagnostics,
print_cuda_runtime_diagnostics,
)
from bitsandbytes.diagnostics.utils import print_dedented, print_header
def sanity_check():
from bitsandbytes.cextension import lib
if lib is None:
print_dedented(
"""
Couldn't load the bitsandbytes library, likely due to missing binaries.
Please ensure bitsandbytes is properly installed.
For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND=cuda -S .`.
See the documentation for more details if needed.
Trying a simple check anyway, but this will likely fail...
"""
)
from bitsandbytes.optim import Adam
p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()
p1 = p.data.sum().item()
adam = Adam([p])
out = a * p
loss = out.sum()
loss.backward()
adam.step()
p2 = p.data.sum().item()
assert p1 != p2
def main():
print_header("")
print_header("BUG REPORT INFORMATION")
print_header("")
print_header("OTHER")
cuda_specs = get_cuda_specs()
print("CUDA specs:", cuda_specs)
if not torch.cuda.is_available():
print("Torch says CUDA is not available. Possible reasons:")
print("1. CUDA driver not installed")
print("2. CUDA not installed")
print("3. You have multiple conflicting CUDA libraries")
if cuda_specs:
print_cuda_diagnostics(cuda_specs)
print_cuda_runtime_diagnostics()
print_header("")
print_header("DEBUG INFO END")
print_header("")
print("Checking that the library is importable and CUDA is callable...")
try:
sanity_check()
print("SUCCESS!")
print("Installation was successful!")
return
except ImportError:
print(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
except Exception:
traceback.print_exc()
print_dedented(
f"""
Above we output some debug information.
Please provide this info when creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose
WARNING: Please be sure to sanitize sensitive info from the output before posting it.
"""
)
sys.exit(1)
import textwrap
HEADER_WIDTH = 60
def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "+") -> None:
txt = f" {txt} " if txt else ""
print(txt.center(width, filler))
def print_dedented(text):
print("\n".join(textwrap.dedent(text).strip().split("\n")))
...@@ -14,7 +14,7 @@ from torch import Tensor ...@@ -14,7 +14,7 @@ from torch import Tensor
from bitsandbytes.utils import pack_dict_to_tensor, unpack_tensor_to_dict from bitsandbytes.utils import pack_dict_to_tensor, unpack_tensor_to_dict
from .cextension import COMPILED_WITH_CUDA, lib from .cextension import lib
# math.prod not compatible with python < 3.8 # math.prod not compatible with python < 3.8
...@@ -23,7 +23,7 @@ def prod(iterable): ...@@ -23,7 +23,7 @@ def prod(iterable):
name2qmap = {} name2qmap = {}
if COMPILED_WITH_CUDA: if lib and lib.compiled_with_cuda:
"""C FUNCTIONS FOR OPTIMIZERS""" """C FUNCTIONS FOR OPTIMIZERS"""
str2optimizer32bit = { str2optimizer32bit = {
"adam": ( "adam": (
......
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
# This source code is licensed under the MIT license found in the # This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree. # LICENSE file in the root directory of this source tree.
from bitsandbytes.cextension import COMPILED_WITH_CUDA
from .adagrad import Adagrad, Adagrad8bit, Adagrad32bit from .adagrad import Adagrad, Adagrad8bit, Adagrad32bit
from .adam import Adam, Adam8bit, Adam32bit, PagedAdam, PagedAdam8bit, PagedAdam32bit from .adam import Adam, Adam8bit, Adam32bit, PagedAdam, PagedAdam8bit, PagedAdam32bit
from .adamw import ( from .adamw import (
......
import os import pytest
from pathlib import Path
import torch from bitsandbytes.cextension import get_cuda_bnb_library_path
from bitsandbytes.cuda_specs import CUDASpecs
# hardcoded test. Not good, but a sanity check for now @pytest.fixture
# TODO: improve this def cuda120_spec() -> CUDASpecs:
def test_manual_override(requires_cuda): return CUDASpecs(
manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2')) cuda_version_string="120",
highest_compute_capability=(8, 6),
cuda_version_tuple=(12, 0),
)
pytorch_version = torch.version.cuda.replace('.', '')
assert pytorch_version != 122 # TODO: this will never be true... @pytest.fixture
def cuda111_noblas_spec() -> CUDASpecs:
return CUDASpecs(
cuda_version_string="111",
highest_compute_capability=(7, 2),
cuda_version_tuple=(11, 1),
)
os.environ['CUDA_HOME']='{manual_cuda_path}'
os.environ['BNB_CUDA_VERSION']='122' def test_get_cuda_bnb_library_path(monkeypatch, cuda120_spec):
#assert str(manual_cuda_path) in os.environ['LD_LIBRARY_PATH'] monkeypatch.delenv("BNB_CUDA_VERSION", raising=False)
import bitsandbytes as bnb assert get_cuda_bnb_library_path(cuda120_spec).stem == "libbitsandbytes_cuda120"
loaded_lib = bnb.cuda_setup.main.CUDASetup.get_instance().binary_name
#assert loaded_lib == 'libbitsandbytes_cuda122.so'
def test_get_cuda_bnb_library_path_override(monkeypatch, cuda120_spec, caplog):
monkeypatch.setenv("BNB_CUDA_VERSION", "110")
assert get_cuda_bnb_library_path(cuda120_spec).stem == "libbitsandbytes_cuda110"
assert "BNB_CUDA_VERSION" in caplog.text # did we get the warning?
def test_get_cuda_bnb_library_path_nocublaslt(monkeypatch, cuda111_noblas_spec):
monkeypatch.delenv("BNB_CUDA_VERSION", raising=False)
assert (
get_cuda_bnb_library_path(cuda111_noblas_spec).stem
== "libbitsandbytes_cuda111_nocublaslt"
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment