Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
4870580f
Commit
4870580f
authored
Jan 07, 2024
by
Tim Dettmers
Browse files
Fixed bnb input in setup.py. Bumped version for release.
parent
3e706031
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
85 additions
and
48 deletions
+85
-48
CHANGELOG.md
CHANGELOG.md
+15
-0
bitsandbytes/__init__.py
bitsandbytes/__init__.py
+1
-1
bitsandbytes/functional.py
bitsandbytes/functional.py
+1
-1
deploy.sh
deploy.sh
+42
-19
setup.py
setup.py
+1
-3
tests/test_cuda_setup_evaluator.py
tests/test_cuda_setup_evaluator.py
+4
-3
tests/test_functional.py
tests/test_functional.py
+17
-17
tests/test_modules.py
tests/test_modules.py
+4
-4
No files found.
CHANGELOG.md
View file @
4870580f
...
...
@@ -327,3 +327,18 @@ Bug fixes:
-
Fixed an issue where 4-bit serialization would fail for layers without double quantization #868. Thank you, @poedator
-
Fixed an issue where calling .to() or .cuda() on a 4-bit layer twice would result in an error #867. Thank you, @jph00
### 0.42.0
Features:
-
4-bit serialization now supported. This enables 4-bit load/store. Thank you @poedator #753
-
the bitsandbytes library now has a version attribute:
`bitsandbytes.__version__`
@rasbt #710
Bug fixes:
-
Fixed bugs in dynamic exponent data type creation. Thank you @RossM, @KohakuBlueleaf, @ArrowM #659 #227 #262 #152
-
Fixed an issue where 4-bit serialization would fail for layers without double quantization #868. Thank you, @poedator
-
Fixed an issue where calling .to() or .cuda() on a 4-bit layer twice would result in an error #867. Thank you, @jph00
-
Fixed a bug where a missing access permission in a path searched for CUDA would lead to an error @osma #677
-
Fixed a bug where the GOOGLE_VM_CONFIG_LOCK_FILE variable could cause errors in colab environments @akrentsel @xaptronic #715 #883 #622
-
Fixed a bug where kgetColRowStats (LLM.int8()) would fail for certain dimensions @LucQueen @905
-
Fixed a bug where the adjusted regular Embedding layer was not available via bnb.nn.Embedding @neel04 #563
-
Fixed added missing scipy requirement @dulalbert #525
bitsandbytes/__init__.py
View file @
4870580f
...
...
@@ -24,6 +24,6 @@ __pdoc__ = {
"optim.optimizer.MockArgs"
:
False
,
}
__version__
=
"0.4
1.3.post1
"
__version__
=
"0.4
2.0
"
PACKAGE_GITHUB_URL
=
"https://github.com/TimDettmers/bitsandbytes"
bitsandbytes/functional.py
View file @
4870580f
...
...
@@ -9,7 +9,6 @@ import random
import
torch
import
itertools
import
math
from
scipy.stats
import
norm
import
numpy
as
np
from
functools
import
reduce
# Required in Python 3
...
...
@@ -235,6 +234,7 @@ def create_linear_map(signed=True, total_bits=8, add_zero=True):
return
torch
.
Tensor
(
values
[:
l
].
tolist
()
+
[
0
]
*
gap
+
values
[
l
:].
tolist
())
def
create_normal_map
(
offset
=
0.9677083
,
use_extra_value
=
True
):
from
scipy.stats
import
norm
if
use_extra_value
:
# one more positive value, this is an asymmetric type
...
...
deploy.sh
View file @
4870580f
...
...
@@ -17,7 +17,7 @@ rm -rf dist build
make cleaneggs
make cleanlibs
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
export
CUDA_VERSION
=
make cpuonly
CUDA_VERSION
=
"CPU"
...
...
@@ -28,7 +28,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.0
make cuda110
CUDA_VERSION
=
110
...
...
@@ -38,7 +38,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.1
make cuda11x
CUDA_VERSION
=
111
...
...
@@ -48,7 +48,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.4
make cuda11x
CUDA_VERSION
=
114
...
...
@@ -58,7 +58,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.5
make cuda11x
CUDA_VERSION
=
115
...
...
@@ -68,7 +68,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x
CUDA_VERSION
=
117
...
...
@@ -78,7 +78,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.8
make cuda118
CUDA_VERSION
=
118
...
...
@@ -88,7 +88,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.0
make cuda12x
CUDA_VERSION
=
120
...
...
@@ -98,7 +98,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.1
make cuda12x
CUDA_VERSION
=
121
...
...
@@ -108,7 +108,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.2
make cuda12x
CUDA_VERSION
=
122
...
...
@@ -118,8 +118,21 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then
exit
64
fi
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.3
make cuda12x
CUDA_VERSION
=
123
make clean
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda123.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
############################# START NO CUBLASLT #############################################
# binaries without 8-bit matmul support START HERE
# ###########################################################################################
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.0
make cuda110_nomatmul
CUDA_VERSION
=
110
...
...
@@ -130,7 +143,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110_nocublaslt.so" ]; then
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.1
make cuda11x_nomatmul
CUDA_VERSION
=
111
...
...
@@ -140,7 +153,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.4
make cuda11x_nomatmul
CUDA_VERSION
=
114
...
...
@@ -150,7 +163,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.5
make cuda11x_nomatmul
CUDA_VERSION
=
115
...
...
@@ -160,7 +173,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.7
make cuda11x_nomatmul
CUDA_VERSION
=
117
...
...
@@ -170,7 +183,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-11.8
make cuda118_nomatmul
CUDA_VERSION
=
118
...
...
@@ -180,7 +193,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.0
make cuda12x_nomatmul
CUDA_VERSION
=
120
...
...
@@ -190,7 +203,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.1
make cuda12x_nomatmul
CUDA_VERSION
=
121
...
...
@@ -200,7 +213,7 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then
exit
64
fi
make clean
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.2
make cuda12x_nomatmul
CUDA_VERSION
=
122
...
...
@@ -210,5 +223,15 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then
exit
64
fi
rm
-rf
build/
*
export
CUDA_HOME
=
$BASE_PATH
/cuda-12.3
make cuda12x_nomatmul
CUDA_VERSION
=
123
if
[
!
-f
"./bitsandbytes/libbitsandbytes_cuda123_nocublaslt.so"
]
;
then
# Control will enter here if $DIRECTORY doesn't exist.
echo
"Compilation unsuccessul!"
1>&2
exit
64
fi
python
-m
build
python
-m
twine upload dist/
*
--verbose
setup.py
View file @
4870580f
...
...
@@ -6,9 +6,7 @@ import glob
import
os
from
setuptools
import
find_packages
,
setup
import
bitsandbytes
as
bnb
VERSION
=
bnb
.
__version__
libs
=
list
(
glob
.
glob
(
"./bitsandbytes/libbitsandbytes*.so"
))
libs
=
[
os
.
path
.
basename
(
p
)
for
p
in
libs
]
...
...
@@ -21,7 +19,7 @@ def read(fname):
setup
(
name
=
f
"bitsandbytes"
,
version
=
VERSION
,
version
=
"0.42.0"
,
author
=
"Tim Dettmers"
,
author_email
=
"dettmers@cs.washington.edu"
,
description
=
"k-bit optimizers and matrix multiplication routines."
,
...
...
tests/test_cuda_setup_evaluator.py
View file @
4870580f
...
...
@@ -4,6 +4,7 @@ import torch
from
pathlib
import
Path
# hardcoded test. Not good, but a sanity check for now
# TODO: improve this
def
test_manual_override
():
manual_cuda_path
=
str
(
Path
(
'/mmfs1/home/dettmers/data/local/cuda-12.2'
))
...
...
@@ -12,11 +13,11 @@ def test_manual_override():
assert
pytorch_version
!=
122
os
.
environ
[
'CUDA_HOME'
]
=
'{manual_cuda_path}'
os
.
environ
[
'CUDA_VERSION'
]
=
'122'
assert
str
(
manual_cuda_path
)
in
os
.
environ
[
'LD_LIBRARY_PATH'
]
os
.
environ
[
'
BNB_
CUDA_VERSION'
]
=
'122'
#
assert str(manual_cuda_path) in os.environ['LD_LIBRARY_PATH']
import
bitsandbytes
as
bnb
loaded_lib
=
bnb
.
cuda_setup
.
main
.
CUDASetup
.
get_instance
().
binary_name
assert
loaded_lib
==
'libbitsandbytes_cuda122.so'
#
assert loaded_lib == 'libbitsandbytes_cuda122.so'
...
...
tests/test_functional.py
View file @
4870580f
...
...
@@ -1992,8 +1992,8 @@ def test_zeropoint():
C2
-=
A
.
sum
(
1
).
view
(
-
1
,
1
)
*
zp
ca
,
cqa
,
cza
=
quant_zp
(
A
)
print
(
ca
.
min
(),
ca
.
max
())
print
((
ca
-
cza
).
min
(),
(
ca
-
cza
).
max
())
#
print(ca.min(), ca.max())
#
print((ca - cza).min(), (ca - cza).max())
zp
=
1
scale
=
2.0
...
...
@@ -2022,14 +2022,14 @@ def test_zeropoint():
C7
-=
zpa
*
zpb
*
A
.
shape
[
1
]
C7
/=
qa
*
qb
print
(
""
)
#
print("")
# print(C0.flatten()[:10])
print
(
C1
.
flatten
()[:
10
])
print
(
C2
.
flatten
()[:
10
])
print
(
C3
.
flatten
()[:
10
])
print
(
C5
.
flatten
()[:
10
])
print
(
C6
.
flatten
()[:
10
])
print
(
C7
.
flatten
()[:
10
])
#
print(C1.flatten()[:10])
#
print(C2.flatten()[:10])
#
print(C3.flatten()[:10])
#
print(C5.flatten()[:10])
#
print(C6.flatten()[:10])
#
print(C7.flatten()[:10])
err1
=
torch
.
abs
(
C1
-
C2
).
mean
().
item
()
err2
=
torch
.
abs
(
C1
-
C3
).
mean
().
item
()
err3
=
torch
.
abs
(
C1
-
C4
).
mean
().
item
()
...
...
@@ -2355,15 +2355,15 @@ def test_normal_map_tree():
code
=
F
.
create_normal_map
()
values
=
code
[:
8
].
tolist
()
+
code
[
-
8
:].
tolist
()
num_pivots
=
1
print
(
values
)
#
print(values)
while
num_pivots
<
16
:
idx
=
list
(
range
(
16
//
num_pivots
//
2
,
16
,
16
//
num_pivots
))
print
(
idx
)
#
print(idx)
num_pivots
*=
2
pivots
=
[]
for
i
in
idx
:
pivots
.
append
((
values
[
i
-
1
]
+
values
[
i
])
/
2
)
print
(
pivots
)
#
print(pivots)
@
pytest
.
mark
.
parametrize
(
"double_quant"
,
[
True
,
False
],
ids
=
[
'DQ_True'
,
'DQ_False'
])
...
...
@@ -2453,11 +2453,11 @@ def test_gemv_4bit(dtype, storage_type, double_quant, kind):
#
#print('='*80)
#print(f'For matmul: {A.shape}, {B.shape}, {kind}, {dtype}, {storage_type}, double_quant={double_quant}:')
print
(
C1
.
flatten
()[
-
20
:])
print
(
C2
.
flatten
()[
-
20
:])
print
(
f
'inference vs training abs:
{
err1
}
'
)
print
(
f
'inference vs training rel:
{
relerr1
}
'
)
print
(
f
'inference vs training max:
{
maxerr1
}
'
)
#
print(C1.flatten()[-20:])
#
print(C2.flatten()[-20:])
#
print(f'inference vs training abs: {err1}')
#
print(f'inference vs training rel: {relerr1}')
#
print(f'inference vs training max: {maxerr1}')
#print(f'inference vs training vs torch err ratio abs: {absratio}')
#print(f'inference vs training vs torch err ratio rel: {relratio}')
#print(f'inference vs training vs torch err ratio max: {maxratio}')
...
...
tests/test_modules.py
View file @
4870580f
...
...
@@ -576,10 +576,10 @@ def test_kbit_backprop(module):
assert
kbit
[
0
].
weight
.
grad
is
None
or
kbit
[
0
].
weight
.
grad
.
sum
().
item
()
==
0
assert
kbit
[
0
].
weight
.
grad
is
None
or
kbit
[
0
].
bias
.
grad
.
sum
().
item
()
==
0
print
(
'out'
,
sum
(
errs1
)
/
len
(
errs1
))
print
(
'grad'
,
sum
(
errs2
)
/
len
(
errs2
))
print
(
'rel out'
,
sum
(
relerrs1
)
/
len
(
relerrs1
))
print
(
'rel grad'
,
sum
(
relerrs2
)
/
len
(
relerrs2
))
#
print('out', sum(errs1)/len(errs1))
#
print('grad', sum(errs2)/len(errs2))
#
print('rel out', sum(relerrs1)/len(relerrs1))
#
print('rel grad', sum(relerrs2)/len(relerrs2))
def
test_fp8linear
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment