Unverified Commit c85733cf authored by Tim Dettmers's avatar Tim Dettmers Committed by GitHub
Browse files

Merge branch 'main' into patch-2

parents d76b6ca9 9c63202a
[pytest]
addopts = -rP
; --cov=bitsandbytes
; # contexts: record which test ran which line; can be seen in html coverage report
; --cov-context=test
; --cov-report html
log_cli = True
log_cli_level = INFO
log_file = logs/pytest.log
\ No newline at end of file
# Copyright 2023 The HuggingFace Team, the AllenNLP library authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script to close stale issue. Taken in part from the AllenNLP repository.
https://github.com/allenai/allennlp.
"""
import os
from datetime import datetime as dt
from datetime import timezone
from github import Github
# All labels that we don't want to touch
LABELS_TO_EXEMPT = [
"feature-request",
]
def main():
g = Github(os.environ["GITHUB_TOKEN"])
repo = g.get_repo("TimDettmers/bitsandbytes")
open_issues = repo.get_issues(state="open")
for issue in open_issues:
comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
last_comment = comments[0] if len(comments) > 0 else None
if (
last_comment is not None
and last_comment.user.login == "github-actions[bot]"
and (dt.now(timezone.utc) - issue.updated_at).days > 7
and (dt.now(timezone.utc) - issue.created_at).days >= 30
and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
):
issue.edit(state="closed")
elif (
(dt.now(timezone.utc) - issue.updated_at).days > 23
and (dt.now(timezone.utc) - issue.created_at).days >= 30
and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
):
issue.create_comment(
"This issue has been automatically marked as stale because it has not had "
"recent activity. If you think this still needs to be addressed "
"please comment on this thread.\n\n"
)
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -18,7 +18,7 @@ def read(fname):
setup(
name=f"bitsandbytes",
version=f"0.40.2",
version=f"0.41.3.post1",
author="Tim Dettmers",
author_email="dettmers@cs.washington.edu",
description="k-bit optimizers and matrix multiplication routines.",
......
......@@ -129,6 +129,7 @@ def test_quantile_quantization():
assert diff < 0.001
def test_dynamic_quantization():
diffs = []
reldiffs = []
......@@ -141,8 +142,8 @@ def test_dynamic_quantization():
diffs.append(diff.mean().item())
reldiffs.append(reldiff.mean().item())
assert diff.mean().item() < 0.0135
# print(sum(diffs)/len(diffs))
# print(sum(reldiffs)/len(reldiffs))
print(sum(diffs)/len(diffs))
print(sum(reldiffs)/len(reldiffs))
for i in range(100):
A1 = torch.rand(1024, 1024, device="cuda")
......@@ -157,7 +158,8 @@ def test_dynamic_quantization():
@pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16], ids=["fp32", "fp16", "bf16"])
@pytest.mark.parametrize("nested", [False, True], ids=["False", "True"])
@pytest.mark.parametrize("blocksize", [4096, 2048, 1024, 512, 256, 128, 64])
def test_dynamic_blockwise_quantization(dtype, nested, blocksize):
@pytest.mark.parametrize("signed", [True, False], ids=['signed_True', 'signed_False'])
def test_dynamic_blockwise_quantization(dtype, nested, blocksize, signed):
#print('')
diffs = []
reldiffs = []
......@@ -178,9 +180,10 @@ def test_dynamic_blockwise_quantization(dtype, nested, blocksize):
assert A2.dtype == dtype
diffs = []
code = F.create_dynamic_map(signed=signed)
for i in range(100):
A1 = torch.rand(1024, 1024, device="cuda", dtype=dtype)
C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested)
C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested, code=code)
A2 = F.dequantize_blockwise(C, S)
diff = torch.abs(A1 - A2).float()
reldiff = diff / torch.abs(A1.float() + 1e-8)
......@@ -189,11 +192,15 @@ def test_dynamic_blockwise_quantization(dtype, nested, blocksize):
#torch.testing.assert_close(A1, A2, atol=1e-2, rtol=0)
abserr = sum(diffs)/len(diffs)
relerr = sum(reldiffs)/len(reldiffs)
if signed:
assert abserr < 0.0035
assert relerr < 0.015
else:
assert abserr < 0.00175
assert relerr < 0.012
assert A2.dtype == dtype
#print('nested=', nested, 'rand', blocksize, sum(diffs)/len(diffs))
#print('nested=', nested, 'rand', blocksize, sum(reldiffs)/len(reldiffs))
#print('signed=', signed, 'nested=', nested, 'rand', blocksize, sum(diffs)/len(diffs))
#print('signed=', signed, 'nested=', nested, 'rand', blocksize, sum(reldiffs)/len(reldiffs))
......@@ -2366,7 +2373,7 @@ def test_normal_map_tree():
def test_gemv_4bit(dtype, storage_type, double_quant, kind):
for dim in [128, 256, 512, 1024]:
#for dim in [4*1024]:
#for dim in [1*128]:
#for dim in [1*16]:
errs1 = []
errs2 = []
errs3 = []
......@@ -2446,11 +2453,11 @@ def test_gemv_4bit(dtype, storage_type, double_quant, kind):
#
#print('='*80)
#print(f'For matmul: {A.shape}, {B.shape}, {kind}, {dtype}, {storage_type}, double_quant={double_quant}:')
#print(C1.flatten()[-20:])
#print(C2.flatten()[-20:])
#print(f'inference vs training abs: {err1}')
#print(f'inference vs training rel: {relerr1}')
#print(f'inference vs training max: {maxerr1}')
print(C1.flatten()[-20:])
print(C2.flatten()[-20:])
print(f'inference vs training abs: {err1}')
print(f'inference vs training rel: {relerr1}')
print(f'inference vs training max: {maxerr1}')
#print(f'inference vs training vs torch err ratio abs: {absratio}')
#print(f'inference vs training vs torch err ratio rel: {relratio}')
#print(f'inference vs training vs torch err ratio max: {maxratio}')
......@@ -2478,7 +2485,7 @@ def test_gemv_4bit(dtype, storage_type, double_quant, kind):
assert maxratio < 1.005 and maxratio > 0.995
elif dtype == torch.bfloat16:
if dim <= 512:
assert err1 < 5e-4
assert err1 < 6e-4
assert relerr1 < 0.007
assert maxerr1 < 0.015
else:
......
import os
from contextlib import nullcontext
from itertools import product
from tempfile import TemporaryDirectory
import pytest
import torch
import bitsandbytes as bnb
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize(
"quant_type, compress_statistics, bias",
list(product(["nf4", "fp4"], [False, True], [False, True])),
)
def test_linear_serialization(quant_type, compress_statistics, bias):
original_dtype = torch.float16
compute_dtype = None
device = "cuda"
layer_shape = (300, 400)
linear = torch.nn.Linear(*layer_shape, dtype=original_dtype, device="cpu") # original layer
# Quantizing original layer
linear_q = bnb.nn.Linear4bit(
linear.in_features,
linear.out_features,
bias=bias,
compute_dtype=compute_dtype,
compress_statistics=compress_statistics,
quant_type=quant_type,
device="meta",
)
new_weight = bnb.nn.Params4bit(data=linear.weight, requires_grad=False)
linear_q.weight = new_weight
if bias:
linear_q.bias = torch.nn.Parameter(linear.bias)
linear_q = linear_q.to(device)
# saving to state_dict:
sd = linear_q.state_dict()
# restoring from state_dict:
bias_data2 = sd.pop("bias", None)
weight_data2 = sd.pop("weight")
weight2 = bnb.nn.Params4bit.from_prequantized(quantized_stats=sd, data=weight_data2)
# creating new layer with same params:
linear_q2 = bnb.nn.Linear4bit(
linear.in_features,
linear.out_features,
bias=bias,
compute_dtype=compute_dtype,
compress_statistics=compress_statistics,
quant_type=quant_type,
device="meta",
)
# loading weights from state_dict:
linear_q2.weight = weight2
if bias:
linear_q2.bias = torch.nn.Parameter(bias_data2)
linear_q2 = linear_q2.to(device)
# MATCHING
a, b = linear_q.weight, linear_q2.weight
assert a.device == b.device
assert a.dtype == b.dtype
assert torch.equal(a, b)
q0 = a.quant_state
q1 = b.quant_state
for attr in ('code', 'dtype', 'blocksize', 'absmax'):
c, d = getattr(q0, attr), getattr(q1, attr)
if isinstance(c, torch.Tensor):
assert torch.equal(c, d)
else:
assert c == d, f"{c} != {d}"
if q0.state2 is not None:
for attr in ('code', 'dtype', 'blocksize', 'absmax'):
c, d = getattr(q0.state2, attr), getattr(q1.state2, attr)
if isinstance(c, torch.Tensor):
assert torch.equal(c, d)
else:
assert c == d, f"{c} != {d}"
if bias:
a, b = linear_q.bias, linear_q2.bias
assert a.device == b.device
assert a.dtype == b.dtype
assert torch.equal(a, b)
# Forward test
x = torch.rand(42, layer_shape[0], device=device)
a = linear_q(x)
b = linear_q2(x)
assert a.device == b.device
assert a.dtype == b.dtype
assert torch.equal(a, b)
# Saved size ratio test. Target set for layer_shape == (300, 400) w/ bias
with TemporaryDirectory() as tmpdir:
state_path_4bit = os.path.join(tmpdir, "state_4bit.pth")
state_path = os.path.join(tmpdir, "state.pth")
torch.save(linear.state_dict(), state_path)
torch.save(linear_q.state_dict(), state_path_4bit)
size_orig, size_4 = os.path.getsize(state_path), os.path.getsize(
state_path_4bit
)
size_ratio = size_4 / size_orig
target_compression = 0.143 if original_dtype == torch.float32 else 0.29 # these numbers get lower as weight shape increases
ratio_error_msg = f"quantized_size {size_4:,} is larger on disk than {target_compression:.2%} of original size {size_orig:,}"
assert size_ratio < target_compression, ratio_error_msg
......@@ -516,7 +516,10 @@ modules.append(bnb.nn.LinearFP4)
modules.append(bnb.nn.LinearNF4)
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compress_statistics=True))
modules.append(lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True))
names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C']
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float32))
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16))
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16))
names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16']
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("module", modules, ids=names)
def test_kbit_backprop(module):
......@@ -563,10 +566,10 @@ def test_kbit_backprop(module):
relerrs2.append(relerr2.mean().item())
if isinstance(module, bnb.nn.Linear8bitLt):
torch.testing.assert_close(grad1, grad2, atol=0.008, rtol=0.05)
assert_all_approx_close(grad1, grad2, atol=0.008, rtol=0.05, count=1)
torch.testing.assert_close(bgrad1, bgrad2, atol=0.008, rtol=0.05)
else:
torch.testing.assert_close(grad1, grad2, atol=0.015, rtol=0.05)
assert_all_approx_close(grad1, grad2, atol=0.015, rtol=0.05, count=1)
torch.testing.assert_close(bgrad1, bgrad2, atol=0.02, rtol=0.05)
ref.zero_grad()
kbit.zero_grad()
......@@ -608,9 +611,33 @@ def test_fp8linear():
assert graderr < 0.00002
assert bgraderr < 0.00002
def test_4bit_warnings():
dim1 = 64
with pytest.warns(UserWarning, match=r'inference or training'):
net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, compute_dtype=torch.float32) for i in range(10)])
net = net.cuda()
inp = torch.rand(10, dim1).cuda().half()
net(inp)
with pytest.warns(UserWarning, match=r'inference.'):
net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, compute_dtype=torch.float32) for i in range(10)])
net = net.cuda()
inp = torch.rand(1, dim1).cuda().half()
net(inp)
with pytest.warns(UserWarning) as record:
net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, compute_dtype=torch.float32) for i in range(10)])
net = net.cuda()
inp = torch.rand(10, dim1).cuda().half()
net(inp)
net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, compute_dtype=torch.float32) for i in range(10)])
net = net.cuda()
inp = torch.rand(1, dim1).cuda().half()
net(inp)
assert len(record) == 2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment