"git@developer.sourcefind.cn:OpenDAS/ktransformers.git" did not exist on "be4b27e841d2084a4db05d0c7a9b27a696f83073"
Unverified Commit 42bc7291 authored by Matthew Douglas's avatar Matthew Douglas Committed by GitHub
Browse files

Improvements to test suite (#1636)

* Improvements for testing suite

* Add workflow for macOS arm64 CPU tests
parent d870f9c5
...@@ -15,7 +15,7 @@ jobs: ...@@ -15,7 +15,7 @@ jobs:
build-cpu: build-cpu:
strategy: strategy:
matrix: matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
include: include:
- os: ubuntu-22.04 - os: ubuntu-22.04
arch: x86_64 arch: x86_64
...@@ -23,6 +23,8 @@ jobs: ...@@ -23,6 +23,8 @@ jobs:
arch: aarch64 arch: aarch64
- os: windows-2025 - os: windows-2025
arch: x86_64 arch: x86_64
- os: macos-15
arch: arm64
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
...@@ -97,7 +99,7 @@ jobs: ...@@ -97,7 +99,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
torch_version: ["2.7.0"] torch_version: ["2.7.0"]
include: include:
- os: ubuntu-22.04 - os: ubuntu-22.04
...@@ -106,6 +108,8 @@ jobs: ...@@ -106,6 +108,8 @@ jobs:
arch: aarch64 arch: aarch64
- os: windows-2025 - os: windows-2025
arch: x86_64 arch: x86_64
- os: macos-15
arch: arm64
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
env: env:
BNB_TEST_DEVICE: cpu BNB_TEST_DEVICE: cpu
......
...@@ -94,7 +94,11 @@ class Test8BitBlockwiseQuantizeFunctional: ...@@ -94,7 +94,11 @@ class Test8BitBlockwiseQuantizeFunctional:
@pytest.mark.parametrize("blocksize", [4096, 2048, 1024, 512, 256, 128, 64]) @pytest.mark.parametrize("blocksize", [4096, 2048, 1024, 512, 256, 128, 64])
@pytest.mark.parametrize("signed", TRUE_FALSE, ids=id_formatter("signed")) @pytest.mark.parametrize("signed", TRUE_FALSE, ids=id_formatter("signed"))
def test_dynamic_blockwise_quantization(self, device, dtype, nested, blocksize, signed): def test_dynamic_blockwise_quantization(self, device, dtype, nested, blocksize, signed):
iters = 100
if device == "cpu": if device == "cpu":
iters = 10
# This test is slow on CPU, so avoid atypical use cases. # This test is slow on CPU, so avoid atypical use cases.
if nested: if nested:
pytest.skip("Not a typical use case.") pytest.skip("Not a typical use case.")
...@@ -106,7 +110,7 @@ class Test8BitBlockwiseQuantizeFunctional: ...@@ -106,7 +110,7 @@ class Test8BitBlockwiseQuantizeFunctional:
diffs = [] diffs = []
reldiffs = [] reldiffs = []
for i in range(100): for i in range(iters):
A1 = torch.randn(1024, 1024, device=device, dtype=dtype) A1 = torch.randn(1024, 1024, device=device, dtype=dtype)
C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested) C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested)
A2 = F.dequantize_blockwise(C, S) A2 = F.dequantize_blockwise(C, S)
...@@ -116,15 +120,13 @@ class Test8BitBlockwiseQuantizeFunctional: ...@@ -116,15 +120,13 @@ class Test8BitBlockwiseQuantizeFunctional:
reldiffs.append(reldiff.mean().item()) reldiffs.append(reldiff.mean().item())
abserr = sum(diffs) / len(diffs) abserr = sum(diffs) / len(diffs)
relerr = sum(reldiffs) / len(reldiffs) relerr = sum(reldiffs) / len(reldiffs)
# print('nested=', nested, 'randn', blocksize, 'dtype', dtype, sum(diffs)/len(diffs))
# print('nested=', nested, 'randn', blocksize, 'dtype', dtype, sum(reldiffs)/len(reldiffs))
assert abserr < 0.011 assert abserr < 0.011
assert relerr < 0.018 assert relerr < 0.018
assert A2.dtype == dtype assert A2.dtype == dtype
diffs = [] diffs = []
code = F.create_dynamic_map(signed=signed) code = F.create_dynamic_map(signed=signed)
for i in range(100): for i in range(iters):
A1 = torch.rand(1024, 1024, device=device, dtype=dtype) A1 = torch.rand(1024, 1024, device=device, dtype=dtype)
C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested, code=code) C, S = F.quantize_blockwise(A1, blocksize=blocksize, nested=nested, code=code)
A2 = F.dequantize_blockwise(C, S) A2 = F.dequantize_blockwise(C, S)
...@@ -142,29 +144,29 @@ class Test8BitBlockwiseQuantizeFunctional: ...@@ -142,29 +144,29 @@ class Test8BitBlockwiseQuantizeFunctional:
assert abserr < 0.00175 assert abserr < 0.00175
assert relerr < 0.012 assert relerr < 0.012
assert A2.dtype == dtype assert A2.dtype == dtype
# print('signed=', signed, 'nested=', nested, 'rand', blocksize, sum(diffs)/len(diffs))
# print('signed=', signed, 'nested=', nested, 'rand', blocksize, sum(reldiffs)/len(reldiffs))
def test_blockwise_cpu_large(self): @pytest.mark.skipif("cpu" not in get_available_devices(), reason="CPU is required")
@pytest.mark.parametrize("hidden", [128])
@pytest.mark.parametrize("blocksize", [4096, 16384])
def test_blockwise_cpu_large(self, hidden, blocksize):
diffs = [] diffs = []
reldiffs = [] reldiffs = []
batch = 128 batch = 128
seq = 128 seq = 128
for hidden in [128]: # , 14336]:
for blocksize in [4096, 16384]: for i in range(2):
for i in range(2): A1 = torch.randn(batch, seq, hidden, device="cpu")
A1 = torch.randn(batch, seq, hidden, device="cpu") t0 = time.time()
t0 = time.time() C, S = F.quantize_blockwise(A1, blocksize=blocksize)
C, S = F.quantize_blockwise(A1, blocksize=blocksize) A2 = F.dequantize_blockwise(C, S, blocksize=blocksize)
A2 = F.dequantize_blockwise(C, S, blocksize=blocksize) print(time.time() - t0)
print(time.time() - t0) diff = torch.abs(A1 - A2)
diff = torch.abs(A1 - A2) reldiff = diff / torch.abs(A1 + 1e-8)
reldiff = diff / torch.abs(A1 + 1e-8) diffs.append(diff.mean().item())
diffs.append(diff.mean().item()) reldiffs.append(reldiff.mean().item())
reldiffs.append(reldiff.mean().item()) assert diffs[-1] < 0.011
assert diffs[-1] < 0.011 # print(sum(diffs)/len(diffs))
# print(sum(diffs)/len(diffs)) # print(sum(reldiffs)/len(reldiffs))
# print(sum(reldiffs)/len(reldiffs))
@pytest.mark.parametrize("device", get_available_devices()) @pytest.mark.parametrize("device", get_available_devices())
@pytest.mark.parametrize("bits", range(2, 9), ids=id_formatter("bits")) @pytest.mark.parametrize("bits", range(2, 9), ids=id_formatter("bits"))
......
...@@ -97,8 +97,12 @@ class TestInt8BlockwiseQuantOps: ...@@ -97,8 +97,12 @@ class TestInt8BlockwiseQuantOps:
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=id_formatter("dtype")) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=id_formatter("dtype"))
@pytest.mark.parametrize("blocksize", [64, 128, 256, 512]) @pytest.mark.parametrize("blocksize", [64, 128, 256, 512])
def test_quantize_blockwise(self, device, dtype, blocksize): def test_quantize_blockwise(self, device, dtype, blocksize):
if device == "cpu" and dtype != torch.float32: if device == "cpu":
pytest.skip("CPU implementation is only available for float32") if dtype != torch.float32:
pytest.skip("CPU implementation is only available for float32")
if blocksize != 256:
pytest.skip("CPU implementation is slow; only test blocksize=256")
code = bitsandbytes.functional.create_dynamic_map().to(device) code = bitsandbytes.functional.create_dynamic_map().to(device)
A = torch.randn(1024, 1024, dtype=dtype, device=device) A = torch.randn(1024, 1024, dtype=dtype, device=device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment