Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
41faf4e3
Unverified
Commit
41faf4e3
authored
Jan 01, 2024
by
Tim Dettmers
Committed by
GitHub
Jan 01, 2024
Browse files
Merge branch 'main' into main
parents
fea5bc7b
095f7a56
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
177 additions
and
1 deletion
+177
-1
scripts/stale.py
scripts/stale.py
+60
-0
setup.py
setup.py
+1
-1
tests/__init__.py
tests/__init__.py
+0
-0
tests/test_linear4bit.py
tests/test_linear4bit.py
+116
-0
No files found.
scripts/stale.py
0 → 100644
View file @
41faf4e3
# Copyright 2023 The HuggingFace Team, the AllenNLP library authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script to close stale issue. Taken in part from the AllenNLP repository.
https://github.com/allenai/allennlp.
"""
import
os
from
datetime
import
datetime
as
dt
from
datetime
import
timezone
from
github
import
Github
# All labels that we don't want to touch
LABELS_TO_EXEMPT
=
[
"feature-request"
,
]
def
main
():
g
=
Github
(
os
.
environ
[
"GITHUB_TOKEN"
])
repo
=
g
.
get_repo
(
"TimDettmers/bitsandbytes"
)
open_issues
=
repo
.
get_issues
(
state
=
"open"
)
for
issue
in
open_issues
:
comments
=
sorted
([
comment
for
comment
in
issue
.
get_comments
()],
key
=
lambda
i
:
i
.
created_at
,
reverse
=
True
)
last_comment
=
comments
[
0
]
if
len
(
comments
)
>
0
else
None
if
(
last_comment
is
not
None
and
last_comment
.
user
.
login
==
"github-actions[bot]"
and
(
dt
.
now
(
timezone
.
utc
)
-
issue
.
updated_at
).
days
>
7
and
(
dt
.
now
(
timezone
.
utc
)
-
issue
.
created_at
).
days
>=
30
and
not
any
(
label
.
name
.
lower
()
in
LABELS_TO_EXEMPT
for
label
in
issue
.
get_labels
())
):
issue
.
edit
(
state
=
"closed"
)
elif
(
(
dt
.
now
(
timezone
.
utc
)
-
issue
.
updated_at
).
days
>
23
and
(
dt
.
now
(
timezone
.
utc
)
-
issue
.
created_at
).
days
>=
30
and
not
any
(
label
.
name
.
lower
()
in
LABELS_TO_EXEMPT
for
label
in
issue
.
get_labels
())
):
issue
.
create_comment
(
"This issue has been automatically marked as stale because it has not had "
"recent activity. If you think this still needs to be addressed "
"please comment on this thread.
\n\n
"
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
setup.py
View file @
41faf4e3
...
...
@@ -18,7 +18,7 @@ def read(fname):
setup
(
name
=
f
"bitsandbytes"
,
version
=
f
"0.41.1"
,
version
=
f
"0.41.
3.post
1"
,
author
=
"Tim Dettmers"
,
author_email
=
"dettmers@cs.washington.edu"
,
description
=
"k-bit optimizers and matrix multiplication routines."
,
...
...
tests/__init__.py
0 → 100644
View file @
41faf4e3
tests/test_linear4bit.py
0 → 100644
View file @
41faf4e3
import
os
from
contextlib
import
nullcontext
from
itertools
import
product
from
tempfile
import
TemporaryDirectory
import
pytest
import
torch
import
bitsandbytes
as
bnb
@
pytest
.
mark
.
skipif
(
not
torch
.
cuda
.
is_available
(),
reason
=
"this test requires a GPU"
)
@
pytest
.
mark
.
parametrize
(
"quant_type, compress_statistics, bias"
,
list
(
product
([
"nf4"
,
"fp4"
],
[
False
,
True
],
[
False
,
True
])),
)
def
test_linear_serialization
(
quant_type
,
compress_statistics
,
bias
):
original_dtype
=
torch
.
float16
compute_dtype
=
None
device
=
"cuda"
layer_shape
=
(
300
,
400
)
linear
=
torch
.
nn
.
Linear
(
*
layer_shape
,
dtype
=
original_dtype
,
device
=
"cpu"
)
# original layer
# Quantizing original layer
linear_q
=
bnb
.
nn
.
Linear4bit
(
linear
.
in_features
,
linear
.
out_features
,
bias
=
bias
,
compute_dtype
=
compute_dtype
,
compress_statistics
=
compress_statistics
,
quant_type
=
quant_type
,
device
=
"meta"
,
)
new_weight
=
bnb
.
nn
.
Params4bit
(
data
=
linear
.
weight
,
requires_grad
=
False
)
linear_q
.
weight
=
new_weight
if
bias
:
linear_q
.
bias
=
torch
.
nn
.
Parameter
(
linear
.
bias
)
linear_q
=
linear_q
.
to
(
device
)
# saving to state_dict:
sd
=
linear_q
.
state_dict
()
# restoring from state_dict:
bias_data2
=
sd
.
pop
(
"bias"
,
None
)
weight_data2
=
sd
.
pop
(
"weight"
)
weight2
=
bnb
.
nn
.
Params4bit
.
from_prequantized
(
quantized_stats
=
sd
,
data
=
weight_data2
)
# creating new layer with same params:
linear_q2
=
bnb
.
nn
.
Linear4bit
(
linear
.
in_features
,
linear
.
out_features
,
bias
=
bias
,
compute_dtype
=
compute_dtype
,
compress_statistics
=
compress_statistics
,
quant_type
=
quant_type
,
device
=
"meta"
,
)
# loading weights from state_dict:
linear_q2
.
weight
=
weight2
if
bias
:
linear_q2
.
bias
=
torch
.
nn
.
Parameter
(
bias_data2
)
linear_q2
=
linear_q2
.
to
(
device
)
# MATCHING
a
,
b
=
linear_q
.
weight
,
linear_q2
.
weight
assert
a
.
device
==
b
.
device
assert
a
.
dtype
==
b
.
dtype
assert
torch
.
equal
(
a
,
b
)
q0
=
a
.
quant_state
q1
=
b
.
quant_state
for
attr
in
(
'code'
,
'dtype'
,
'blocksize'
,
'absmax'
):
c
,
d
=
getattr
(
q0
,
attr
),
getattr
(
q1
,
attr
)
if
isinstance
(
c
,
torch
.
Tensor
):
assert
torch
.
equal
(
c
,
d
)
else
:
assert
c
==
d
,
f
"
{
c
}
!=
{
d
}
"
if
q0
.
state2
is
not
None
:
for
attr
in
(
'code'
,
'dtype'
,
'blocksize'
,
'absmax'
):
c
,
d
=
getattr
(
q0
.
state2
,
attr
),
getattr
(
q1
.
state2
,
attr
)
if
isinstance
(
c
,
torch
.
Tensor
):
assert
torch
.
equal
(
c
,
d
)
else
:
assert
c
==
d
,
f
"
{
c
}
!=
{
d
}
"
if
bias
:
a
,
b
=
linear_q
.
bias
,
linear_q2
.
bias
assert
a
.
device
==
b
.
device
assert
a
.
dtype
==
b
.
dtype
assert
torch
.
equal
(
a
,
b
)
# Forward test
x
=
torch
.
rand
(
42
,
layer_shape
[
0
],
device
=
device
)
a
=
linear_q
(
x
)
b
=
linear_q2
(
x
)
assert
a
.
device
==
b
.
device
assert
a
.
dtype
==
b
.
dtype
assert
torch
.
equal
(
a
,
b
)
# Saved size ratio test. Target set for layer_shape == (300, 400) w/ bias
with
TemporaryDirectory
()
as
tmpdir
:
state_path_4bit
=
os
.
path
.
join
(
tmpdir
,
"state_4bit.pth"
)
state_path
=
os
.
path
.
join
(
tmpdir
,
"state.pth"
)
torch
.
save
(
linear
.
state_dict
(),
state_path
)
torch
.
save
(
linear_q
.
state_dict
(),
state_path_4bit
)
size_orig
,
size_4
=
os
.
path
.
getsize
(
state_path
),
os
.
path
.
getsize
(
state_path_4bit
)
size_ratio
=
size_4
/
size_orig
target_compression
=
0.143
if
original_dtype
==
torch
.
float32
else
0.29
# these numbers get lower as weight shape increases
ratio_error_msg
=
f
"quantized_size
{
size_4
:,
}
is larger on disk than
{
target_compression
:.
2
%
}
of original size
{
size_orig
:,
}
"
assert
size_ratio
<
target_compression
,
ratio_error_msg
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment