Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
3479d02a
Commit
3479d02a
authored
Aug 01, 2022
by
Tim Dettmers
Browse files
Added some more docs and comments.
parent
8bf3e9fa
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
26 deletions
+36
-26
bitsandbytes/cuda_setup.py
bitsandbytes/cuda_setup.py
+33
-26
tests/test_cuda_setup_evaluator.py
tests/test_cuda_setup_evaluator.py
+3
-0
No files found.
bitsandbytes/cuda_setup.py
View file @
3479d02a
...
...
@@ -27,17 +27,24 @@ from .utils import print_err, warn_of_missing_prerequisite, execute_and_return
def
check_cuda_result
(
cuda
,
result_val
):
# 3. Check for CUDA errors
if
result_val
!=
0
:
# TODO: undefined name 'error_str'
error_str
=
ctypes
.
c_char_p
()
cuda
.
cuGetErrorString
(
result_val
,
ctypes
.
byref
(
error_str
))
print
(
"Count not initialize CUDA - failure!"
)
raise
Exception
(
"CUDA exception!"
)
return
result_val
raise
Exception
(
f
"CUDA exception! ERROR:
{
error_str
}
"
)
# taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
def
get_compute_capability
():
libnames
=
(
"libcuda.so"
,
"libcuda.dylib"
,
"cuda.dll"
)
# 1. find libcuda.so library (GPU driver) (/usr/lib)
# init_device -> init variables -> call function by reference
# 2. call extern C function to determine CC
# (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html)
# 3. Check for CUDA errors
# https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
# 1. find libcuda.so library (GPU driver) (/usr/lib)
libnames
=
(
"libcuda.so"
,)
for
libname
in
libnames
:
try
:
cuda
=
ctypes
.
CDLL
(
libname
)
...
...
@@ -54,31 +61,23 @@ def get_compute_capability():
result
=
ctypes
.
c_int
()
device
=
ctypes
.
c_int
()
# TODO: local variable 'context' is assigned to but never used
context
=
ctypes
.
c_void_p
()
# TODO: local variable 'error_str' is assigned to but never used
error_str
=
ctypes
.
c_char_p
()
result
=
check_cuda_result
(
cuda
,
cuda
.
cuInit
(
0
))
check_cuda_result
(
cuda
,
cuda
.
cuInit
(
0
))
result
=
check_cuda_result
(
cuda
,
cuda
.
cuDeviceGetCount
(
ctypes
.
byref
(
nGpus
)))
check_cuda_result
(
cuda
,
cuda
.
cuDeviceGetCount
(
ctypes
.
byref
(
nGpus
)))
ccs
=
[]
for
i
in
range
(
nGpus
.
value
):
result
=
check_cuda_result
(
cuda
,
cuda
.
cuDeviceGet
(
ctypes
.
byref
(
device
),
i
)
)
result
=
check_cuda_result
(
cuda
,
cuda
.
cuDeviceComputeCapability
(
ctypes
.
byref
(
cc_major
),
ctypes
.
byref
(
cc_minor
),
device
),
)
check_cuda_result
(
cuda
,
cuda
.
cuDeviceGet
(
ctypes
.
byref
(
device
),
i
))
ref_major
=
ctypes
(
cc_major
)
ref_minor
=
ctypes
(
cc_minor
)
# 2. call extern C function to determine CC
check_cuda_result
(
cuda
,
cuda
.
cuDeviceComputeCapability
(
ref_major
,
ref_minor
,
device
))
ccs
.
append
(
f
"
{
cc_major
.
value
}
.
{
cc_minor
.
value
}
"
)
# TODO: handle different compute capabilities; for now, take the max
ccs
.
sort
()
# return
ccs[-1]
return
cc
s
max_cc
=
ccs
[
-
1
]
return
max_
cc
CUDA_RUNTIME_LIB
:
str
=
"libcudart.so"
...
...
@@ -89,6 +88,7 @@ def tokenize_paths(paths: str) -> Set[Path]:
def
resolve_env_variable
(
env_var
):
'''Searches a given envirionmental library or path for the CUDA runtime library (libcudart.so)'''
paths
:
Set
[
Path
]
=
tokenize_paths
(
env_var
)
non_existent_directories
:
Set
[
Path
]
=
{
...
...
@@ -112,13 +112,16 @@ def resolve_env_variable(env_var):
f
"Found duplicate
{
CUDA_RUNTIME_LIB
}
files:
{
cuda_runtime_libs
}
.."
)
raise
FileNotFoundError
(
err_msg
)
elif
len
(
cuda_runtime_libs
)
==
0
:
return
None
elif
len
(
cuda_runtime_libs
)
==
0
:
return
None
# this is not en error, since other envs can contain CUDA
else
:
return
next
(
iter
(
cuda_runtime_libs
))
# for now just return the first
def
get_cuda_runtime_lib_path
()
->
Union
[
Path
,
None
]:
"""# TODO: add doc-string"""
'''Searches conda installation and environmental paths for a cuda installations.'''
cuda_runtime_libs
=
[]
# CONDA_PREFIX/lib is the default location for a default conda
# install of pytorch. This location takes priortiy over all
# other defined variables
if
'CONDA_PREFIX'
in
os
.
environ
:
lib_conda_path
=
f
'
{
os
.
environ
[
"CONDA_PREFIX"
]
}
/lib/'
print
(
lib_conda_path
)
...
...
@@ -126,6 +129,8 @@ def get_cuda_runtime_lib_path() -> Union[Path, None]:
if
len
(
cuda_runtime_libs
)
==
1
:
return
cuda_runtime_libs
[
0
]
# if CONDA_PREFIX does not have the library, search the environment
# (in particualr LD_LIBRARY PATH)
for
var
in
os
.
environ
:
cuda_runtime_libs
.
append
(
resolve_env_variable
(
var
))
...
...
@@ -146,17 +151,19 @@ def evaluate_cuda_setup():
if
not
(
has_gpu
:
=
bool
(
cc
)):
print
(
"WARNING: No GPU detected! Check our CUDA paths. Processing to load CPU-only library..."
"WARNING: No GPU detected! Check
y
our CUDA paths. Processing to load CPU-only library..."
)
return
binary_name
has_cublaslt
=
cc
in
[
"7.5"
,
"8.0"
,
"8.6"
]
# TODO:
# (1)
Model
missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (1)
CUDA
missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (2) Multiple CUDA versions installed
cuda_home
=
str
(
Path
(
cuda_path
).
parent
.
parent
)
# we use ls -l instead of nvcc to determine the cuda version
# since most installations will have the libcudart.so installed, but not the compiler
ls_output
,
err
=
execute_and_return
(
f
"ls -l
{
cuda_path
}
"
)
major
,
minor
,
revision
=
ls_output
.
split
(
' '
)[
-
1
].
replace
(
'libcudart.so.'
,
''
).
split
(
'.'
)
cuda_version_string
=
f
"
{
major
}{
minor
}
"
...
...
tests/test_cuda_setup_evaluator.py
View file @
3479d02a
...
...
@@ -92,6 +92,9 @@ def test_get_cuda_runtime_lib_path__non_existent_dir(capsys, tmp_path):
def
test_full_system
():
## this only tests the cuda version and not compute capability
# if CONDA_PREFIX exists, it has priority before all other env variables
# but it does not contain the library directly, so we need to look at the a sub-folder
version
=
''
if
'CONDA_PREFIX'
in
os
.
environ
:
ls_output
,
err
=
bnb
.
utils
.
execute_and_return
(
f
'ls -l
{
os
.
environ
[
"CONDA_PREFIX"
]
}
/lib/libcudart.so'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment