Unverified Commit f1c75741 authored by Aarni Koskela's avatar Aarni Koskela Committed by GitHub
Browse files

Tests: improve CUDA support detection (#985)

* implicitly skip any test that implicitly uses CUDA on a non-CUDA box
* add a `requires_cuda` fixture
parent 53f8af8c
import pytest
import torch
def pytest_runtest_call(item):
try:
item.runtest()
except AssertionError as ae:
if str(ae) == "Torch not compiled with CUDA enabled":
pytest.skip("Torch not compiled with CUDA enabled")
raise
@pytest.fixture(scope="session")
def requires_cuda() -> bool:
cuda_available = torch.cuda.is_available()
if not cuda_available:
pytest.skip("CUDA is required")
return cuda_available
...@@ -40,7 +40,6 @@ names = [ ...@@ -40,7 +40,6 @@ names = [
ids=names, ids=names,
) )
def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
if dim2 > 0: if dim2 > 0:
dim2 = dim2 - (dim2 % 16) dim2 = dim2 - (dim2 % 16)
dim3 = dim3 - (dim3 % 16) dim3 = dim3 - (dim3 % 16)
...@@ -307,7 +306,6 @@ def test_matmullt( ...@@ -307,7 +306,6 @@ def test_matmullt(
has_fp16_weights, has_fp16_weights,
has_bias has_bias
): ):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3) dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3)
outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda") outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda")
...@@ -461,7 +459,6 @@ quant_type = ['fp4', 'nf4'] ...@@ -461,7 +459,6 @@ quant_type = ['fp4', 'nf4']
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type)) values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type)) str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values] names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names) @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names)
def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type): def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
...@@ -551,7 +548,6 @@ has_fp16_weights = [True, False] ...@@ -551,7 +548,6 @@ has_fp16_weights = [True, False]
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose)) values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose)) str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values] names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names) @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names)
def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
......
...@@ -5,12 +5,12 @@ from pathlib import Path ...@@ -5,12 +5,12 @@ from pathlib import Path
# hardcoded test. Not good, but a sanity check for now # hardcoded test. Not good, but a sanity check for now
# TODO: improve this # TODO: improve this
def test_manual_override(): def test_manual_override(requires_cuda):
manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2')) manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2'))
pytorch_version = torch.version.cuda.replace('.', '') pytorch_version = torch.version.cuda.replace('.', '')
assert pytorch_version != 122 assert pytorch_version != 122 # TODO: this will never be true...
os.environ['CUDA_HOME']='{manual_cuda_path}' os.environ['CUDA_HOME']='{manual_cuda_path}'
os.environ['BNB_CUDA_VERSION']='122' os.environ['BNB_CUDA_VERSION']='122'
......
...@@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans ...@@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
return return
if dtype == torch.int32 and out_order != "col32": if dtype == torch.int32 and out_order != "col32":
return return
func = F.get_transform_func(dtype, orderA, orderOut, transpose) try:
func = F.get_transform_func(dtype, orderA, orderOut, transpose)
except ValueError as ve:
pytest.skip(str(ve)) # skip if not supported
if dims == 2: if dims == 2:
A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype) A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype)
...@@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype): ...@@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype):
assert relerr.item() < 0.28 assert relerr.item() < 0.28
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) @pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
def test_4bit_compressed_stats(quant_type): def test_4bit_compressed_stats(quant_type):
for blocksize in [128, 64]: for blocksize in [128, 64]:
...@@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type): ...@@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type):
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
#@pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) #@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
@pytest.mark.parametrize("quant_type", ['nf4']) @pytest.mark.parametrize("quant_type", ['nf4'])
def test_bench_4bit_dequant(quant_type): def test_bench_4bit_dequant(quant_type):
......
...@@ -79,7 +79,7 @@ def model_and_tokenizer(request): ...@@ -79,7 +79,7 @@ def model_and_tokenizer(request):
@pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False']) @pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False'])
@pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False']) @pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False'])
#@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32']) #@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32'])
def test_pi(model_and_tokenizer, inference_kernel, DQ): def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ):
print('') print('')
dtype = torch.float16 dtype = torch.float16
......
...@@ -15,7 +15,6 @@ storage = { ...@@ -15,7 +15,6 @@ storage = {
'float32': torch.float32 'float32': torch.float32
} }
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"quant_type, compress_statistics, bias, quant_storage", "quant_type, compress_statistics, bias, quant_storage",
list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])), list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])),
......
...@@ -33,7 +33,6 @@ def test_layout_exact_match(): ...@@ -33,7 +33,6 @@ def test_layout_exact_match():
assert torch.all(torch.eq(restored_x, x)) assert torch.all(torch.eq(restored_x, x))
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
def test_linear_no_igemmlt(): def test_linear_no_igemmlt():
linear = torch.nn.Linear(1024, 3072) linear = torch.nn.Linear(1024, 3072)
x = torch.randn(3, 1024, dtype=torch.half) x = torch.randn(3, 1024, dtype=torch.half)
...@@ -68,7 +67,6 @@ def test_linear_no_igemmlt(): ...@@ -68,7 +67,6 @@ def test_linear_no_igemmlt():
assert linear_custom.state.CxB is None assert linear_custom.state.CxB is None
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt", @pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt",
list(product([False, True], [False, True], [False, True], [False, True]))) list(product([False, True], [False, True], [False, True], [False, True])))
def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt): def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt):
......
...@@ -520,7 +520,6 @@ modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float ...@@ -520,7 +520,6 @@ modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16)) modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16))
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16)) modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16))
names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16'] names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16']
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("module", modules, ids=names) @pytest.mark.parametrize("module", modules, ids=names)
def test_kbit_backprop(module): def test_kbit_backprop(module):
b = 17 b = 17
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment