Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
8f84674d
Commit
8f84674d
authored
Aug 04, 2022
by
Tim Dettmers
Browse files
Fixed bugs in cuda setup.
parent
758c7175
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
19 additions
and
12 deletions
+19
-12
bitsandbytes/cextension.py
bitsandbytes/cextension.py
+4
-3
bitsandbytes/cuda_setup/__init__.py
bitsandbytes/cuda_setup/__init__.py
+2
-0
bitsandbytes/cuda_setup/main.py
bitsandbytes/cuda_setup/main.py
+9
-5
tests/test_autograd.py
tests/test_autograd.py
+4
-4
No files found.
bitsandbytes/cextension.py
View file @
8f84674d
...
@@ -17,12 +17,13 @@ class CUDALibrary_Singleton(object):
...
@@ -17,12 +17,13 @@ class CUDALibrary_Singleton(object):
binary_path
=
package_dir
/
binary_name
binary_path
=
package_dir
/
binary_name
if
not
binary_path
.
exists
():
if
not
binary_path
.
exists
():
print
(
f
"TODO: compile library for specific version:
{
binary_name
}
"
)
print
(
f
"
CUDA_SETUP:
TODO: compile library for specific version:
{
binary_name
}
"
)
legacy_binary_name
=
"libbitsandbytes.so"
legacy_binary_name
=
"libbitsandbytes.so"
print
(
f
"Defaulting to
{
legacy_binary_name
}
..."
)
print
(
f
"
CUDA_SETUP:
Defaulting to
{
legacy_binary_name
}
..."
)
self
.
lib
=
ct
.
cdll
.
LoadLibrary
(
package_dir
/
legacy_binary_name
)
self
.
lib
=
ct
.
cdll
.
LoadLibrary
(
package_dir
/
legacy_binary_name
)
else
:
else
:
self
.
lib
=
ct
.
cdll
.
LoadLibrary
(
package_dir
/
binary_name
)
print
(
f
"CUDA_SETUP: Loading binary
{
binary_path
}
..."
)
self
.
lib
=
ct
.
cdll
.
LoadLibrary
(
binary_path
)
@
classmethod
@
classmethod
def
get_instance
(
cls
):
def
get_instance
(
cls
):
...
...
bitsandbytes/cuda_setup/__init__.py
View file @
8f84674d
from
.paths
import
CUDA_RUNTIME_LIB
,
extract_candidate_paths
,
determine_cuda_runtime_lib_path
from
.main
import
evaluate_cuda_setup
bitsandbytes/cuda_setup/main.py
View file @
8f84674d
...
@@ -47,6 +47,7 @@ def get_compute_capabilities():
...
@@ -47,6 +47,7 @@ def get_compute_capabilities():
cuda
=
ctypes
.
CDLL
(
"libcuda.so"
)
cuda
=
ctypes
.
CDLL
(
"libcuda.so"
)
except
OSError
:
except
OSError
:
# TODO: shouldn't we error or at least warn here?
# TODO: shouldn't we error or at least warn here?
print
(
'ERROR: libcuda.so not found!'
)
return
None
return
None
nGpus
=
ctypes
.
c_int
()
nGpus
=
ctypes
.
c_int
()
...
@@ -70,7 +71,7 @@ def get_compute_capabilities():
...
@@ -70,7 +71,7 @@ def get_compute_capabilities():
)
)
ccs
.
append
(
f
"
{
cc_major
.
value
}
.
{
cc_minor
.
value
}
"
)
ccs
.
append
(
f
"
{
cc_major
.
value
}
.
{
cc_minor
.
value
}
"
)
return
ccs
.
sort
()
return
ccs
# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error
# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error
...
@@ -80,7 +81,8 @@ def get_compute_capability():
...
@@ -80,7 +81,8 @@ def get_compute_capability():
capabilities are downwards compatible. If no GPUs are detected, it returns
capabilities are downwards compatible. If no GPUs are detected, it returns
None.
None.
"""
"""
if
ccs
:
=
get_compute_capabilities
()
is
not
None
:
ccs
=
get_compute_capabilities
()
if
ccs
is
not
None
:
# TODO: handle different compute capabilities; for now, take the max
# TODO: handle different compute capabilities; for now, take the max
return
ccs
[
-
1
]
return
ccs
[
-
1
]
return
None
return
None
...
@@ -92,8 +94,7 @@ def evaluate_cuda_setup():
...
@@ -92,8 +94,7 @@ def evaluate_cuda_setup():
cc
=
get_compute_capability
()
cc
=
get_compute_capability
()
binary_name
=
"libbitsandbytes_cpu.so"
binary_name
=
"libbitsandbytes_cpu.so"
# FIXME: has_gpu is still unused
if
cc
==
''
:
if
not
(
has_gpu
:
=
bool
(
cc
)):
print
(
print
(
"WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..."
"WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..."
)
)
...
@@ -115,6 +116,7 @@ def evaluate_cuda_setup():
...
@@ -115,6 +116,7 @@ def evaluate_cuda_setup():
ls_output
.
split
(
" "
)[
-
1
].
replace
(
"libcudart.so."
,
""
).
split
(
"."
)
ls_output
.
split
(
" "
)[
-
1
].
replace
(
"libcudart.so."
,
""
).
split
(
"."
)
)
)
cuda_version_string
=
f
"
{
major
}{
minor
}
"
cuda_version_string
=
f
"
{
major
}{
minor
}
"
print
(
f
'CUDA_SETUP: Detected CUDA version
{
cuda_version_string
}
'
)
def
get_binary_name
():
def
get_binary_name
():
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so"
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so"
...
@@ -122,6 +124,8 @@ def evaluate_cuda_setup():
...
@@ -122,6 +124,8 @@ def evaluate_cuda_setup():
if
has_cublaslt
:
if
has_cublaslt
:
return
f
"
{
bin_base_name
}{
cuda_version_string
}
.so"
return
f
"
{
bin_base_name
}{
cuda_version_string
}
.so"
else
:
else
:
return
f
"
{
bin_base_name
}
_nocublaslt.so"
return
f
"
{
bin_base_name
}{
cuda_version_string
}
_nocublaslt.so"
binary_name
=
get_binary_name
()
return
binary_name
return
binary_name
tests/test_autograd.py
View file @
8f84674d
...
@@ -351,9 +351,9 @@ def test_matmullt(
...
@@ -351,9 +351,9 @@ def test_matmullt(
err
=
torch
.
abs
(
out_bnb
-
out_torch
).
mean
().
item
()
err
=
torch
.
abs
(
out_bnb
-
out_torch
).
mean
().
item
()
# print(f'abs error {err:.4f}')
# print(f'abs error {err:.4f}')
idx
=
torch
.
isclose
(
out_bnb
,
out_torch
,
atol
=
0.01
,
rtol
=
0.1
)
idx
=
torch
.
isclose
(
out_bnb
,
out_torch
,
atol
=
0.01
,
rtol
=
0.1
)
assert
(
idx
==
0
).
sum
().
item
()
<
n
*
0.0175
assert
(
idx
==
0
).
sum
().
item
()
<
=
n
*
0.0175
idx
=
torch
.
isclose
(
out_bnb
,
out_torch
,
atol
=
0.035
,
rtol
=
0.2
)
idx
=
torch
.
isclose
(
out_bnb
,
out_torch
,
atol
=
0.035
,
rtol
=
0.2
)
assert
(
idx
==
0
).
sum
().
item
()
<
n
*
0.001
assert
(
idx
==
0
).
sum
().
item
()
<
=
n
*
0.001
if
has_fp16_weights
:
if
has_fp16_weights
:
if
any
(
req_grad
):
if
any
(
req_grad
):
...
@@ -391,9 +391,9 @@ def test_matmullt(
...
@@ -391,9 +391,9 @@ def test_matmullt(
assert
torch
.
abs
(
gradB2
).
sum
()
==
0.0
assert
torch
.
abs
(
gradB2
).
sum
()
==
0.0
idx
=
torch
.
isclose
(
gradB1
,
gradB2
,
atol
=
0.06
,
rtol
=
0.3
)
idx
=
torch
.
isclose
(
gradB1
,
gradB2
,
atol
=
0.06
,
rtol
=
0.3
)
assert
(
idx
==
0
).
sum
().
item
()
<
n
*
0.1
assert
(
idx
==
0
).
sum
().
item
()
<
=
n
*
0.1
idx
=
torch
.
isclose
(
gradB1
,
gradB2
,
atol
=
0.10
,
rtol
=
0.3
)
idx
=
torch
.
isclose
(
gradB1
,
gradB2
,
atol
=
0.10
,
rtol
=
0.3
)
assert
(
idx
==
0
).
sum
().
item
()
<
n
*
0.02
assert
(
idx
==
0
).
sum
().
item
()
<
=
n
*
0.02
torch
.
testing
.
assert_allclose
(
torch
.
testing
.
assert_allclose
(
gradB1
,
gradB2
,
atol
=
0.18
,
rtol
=
0.3
gradB1
,
gradB2
,
atol
=
0.18
,
rtol
=
0.3
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment