Unverified Commit e651e8ed authored by Won-Kyu Park's avatar Won-Kyu Park Committed by GitHub
Browse files

fix library detection, enable Windows (#873)



* fix library loading
Signed-off-by: default avatarWon-Kyu Park <wkpark@gmail.com>

* fixed library loading

 * use os.pathsep

* use glob(), search CUDA_PATH

* call find_file_recursive() without ext

---------
Signed-off-by: default avatarWon-Kyu Park <wkpark@gmail.com>
Co-authored-by: default avatarJames Wyatt <Jamezo97@gmail.com>
parent 94c7f2c5
......@@ -11,34 +11,18 @@ import torch
HEADER_WIDTH = 60
def execute_and_return(command_string: str) -> Tuple[str, str]:
def _decode(subprocess_err_out_tuple):
return tuple(
to_decode.decode("UTF-8").strip()
for to_decode in subprocess_err_out_tuple
)
def execute_and_return_decoded_std_streams(command_string):
return _decode(
subprocess.Popen(
shlex.split(command_string),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
).communicate()
)
std_out, std_err = execute_and_return_decoded_std_streams(command_string)
return std_out, std_err
def find_file_recursive(folder, filename):
folder = shlex.quote(folder)
filename = shlex.quote(filename)
cmd = f'find {folder} -name {filename}'
out, err = execute_and_return(cmd)
if len(err) > 0:
raise RuntimeError('Something when wrong when trying to find file. Maybe you do not have a linux system?')
import glob
outs = []
try:
for ext in ["so", "dll", "dylib"]:
out = glob.glob(os.path.join(folder, "**", filename + ext))
outs.extend(out)
except Exception as e:
raise RuntimeError('Error: Something when wrong when trying to find file. {e}')
return out
return outs
def generate_bug_report_information():
......@@ -48,18 +32,23 @@ def generate_bug_report_information():
print('')
if 'CONDA_PREFIX' in os.environ:
paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*so')
paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*')
print_header("ANACONDA CUDA PATHS")
print(paths)
print('')
if isdir('/usr/local/'):
paths = find_file_recursive('/usr/local', '*cuda*so')
paths = find_file_recursive('/usr/local', '*cuda*')
print_header("/usr/local CUDA PATHS")
print(paths)
print('')
if 'CUDA_PATH' in os.environ and isdir(os.environ['CUDA_PATH']):
paths = find_file_recursive(os.environ['CUDA_PATH'], '*cuda*')
print_header("CUDA PATHS")
print(paths)
print('')
if isdir(os.getcwd()):
paths = find_file_recursive(os.getcwd(), '*cuda*so')
paths = find_file_recursive(os.getcwd(), '*cuda*')
print_header("WORKING DIRECTORY CUDA PATHS")
print(paths)
print('')
......@@ -67,11 +56,11 @@ def generate_bug_report_information():
print_header("LD_LIBRARY CUDA PATHS")
if 'LD_LIBRARY_PATH' in os.environ:
lib_path = os.environ['LD_LIBRARY_PATH'].strip()
for path in set(lib_path.split(':')):
for path in set(lib_path.split(os.pathsep)):
try:
if isdir(path):
print_header(f"{path} CUDA PATHS")
paths = find_file_recursive(path, '*cuda*so')
paths = find_file_recursive(path, '*cuda*')
print(paths)
except:
print(f'Could not read LD_LIBRARY_PATH: {path}')
......
......@@ -26,7 +26,7 @@ def to_be_ignored(env_var: str, value: str) -> bool:
def might_contain_a_path(candidate: str) -> bool:
return "/" in candidate
return os.sep in candidate
def is_active_conda_env(env_var: str) -> bool:
......
......@@ -19,6 +19,7 @@ evaluation:
import ctypes as ct
import os
import errno
import platform
import torch
from warnings import warn
from itertools import product
......@@ -31,7 +32,11 @@ from .env_vars import get_potentially_lib_path_containing_env_vars
# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead
# we have libcudart.so.11.0 which causes a lot of errors before
# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt
CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0', 'libcudart.so.12.1', 'libcudart.so.12.2']
system = platform.system()
if system == 'Windows':
CUDA_RUNTIME_LIBS: list = ["nvcuda.dll"]
else: # Linux or other
CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0', 'libcudart.so.12.1', 'libcudart.so.12.2']
# this is a order list of backup paths to search CUDA in, if it cannot be found in the main environmental paths
backup_paths = []
......@@ -114,7 +119,9 @@ class CUDASetup:
'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n'
f'Loading CUDA version: BNB_CUDA_VERSION={os.environ["BNB_CUDA_VERSION"]}'
f'\n{"="*80}\n\n'))
self.binary_name = self.binary_name[:-6] + f'{os.environ["BNB_CUDA_VERSION"]}.so'
binary_name = self.binary_name.rsplit(".", 1)[0]
suffix = ".so" if os.name != "nt" else ".dll"
self.binary_name = binary_name[:-3] + f'{os.environ["BNB_CUDA_VERSION"]}.{suffix}'
def run_cuda_setup(self):
self.initialized = True
......@@ -131,10 +138,11 @@ class CUDASetup:
package_dir = Path(__file__).parent.parent
binary_path = package_dir / self.binary_name
suffix = ".so" if os.name != "nt" else ".dll"
try:
if not binary_path.exists():
self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?")
legacy_binary_name = "libbitsandbytes_cpu.so"
legacy_binary_name = f"libbitsandbytes_cpu{suffix}"
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...")
binary_path = package_dir / legacy_binary_name
if not binary_path.exists() or torch.cuda.is_available():
......@@ -153,10 +161,10 @@ class CUDASetup:
self.add_log_entry('')
self.generate_instructions()
raise Exception('CUDA SETUP: Setup Failed!')
self.lib = ct.cdll.LoadLibrary(binary_path)
self.lib = ct.cdll.LoadLibrary(str(binary_path))
else:
self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path}...")
self.lib = ct.cdll.LoadLibrary(binary_path)
self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path!s}...")
self.lib = ct.cdll.LoadLibrary(str(binary_path))
except Exception as ex:
self.add_log_entry(str(ex))
......@@ -190,7 +198,7 @@ def is_cublasLt_compatible(cc):
return has_cublaslt
def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]:
return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path}
return {Path(ld_path) for ld_path in paths_list_candidate.split(os.pathsep) if ld_path}
def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]:
......@@ -336,13 +344,14 @@ def get_compute_capabilities():
def evaluate_cuda_setup():
cuda_setup = CUDASetup.get_instance()
suffix = ".so" if os.name != "nt" else ".dll"
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
cuda_setup.add_log_entry('')
cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35)
cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'),
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues'))
cuda_setup.add_log_entry('='*80)
if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None
if not torch.cuda.is_available(): return f'libbitsandbytes_cpu{suffix}', None, None, None
cudart_path = determine_cuda_runtime_lib_path()
ccs = get_compute_capabilities()
......@@ -366,9 +375,11 @@ def evaluate_cuda_setup():
# since most installations will have the libcudart.so installed, but not the compiler
if has_cublaslt:
binary_name = f"libbitsandbytes_cuda{cuda_version_string}.so"
binary_name = f"libbitsandbytes_cuda{cuda_version_string}"
else:
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so"
binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt.so"
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt"
binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt"
binary_name = f"{binary_name}{suffix}"
return binary_name, cudart_path, cc, cuda_version_string
......@@ -9,6 +9,7 @@ from setuptools import find_packages, setup
libs = list(glob.glob("./bitsandbytes/libbitsandbytes*.so"))
libs += list(glob.glob("./bitsandbytes/libbitsandbytes*.dll"))
libs = [os.path.basename(p) for p in libs]
print("libs:", libs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment