You need to sign in or sign up before continuing.
Unverified Commit cdcae8d3 authored by Matthew Douglas's avatar Matthew Douglas Committed by GitHub
Browse files

CI runner updates (#1643)

* Test g5g runner

* Switch L4 to L40S runner; swap GitHub Linux T4 runner for AWS g4dn

* Run tests on last 2 pytorch stable releases

* Run tests on last 2 pytorch stable releases
parent 513e69be
...@@ -49,7 +49,7 @@ jobs: ...@@ -49,7 +49,7 @@ jobs:
build-cuda: build-cuda:
strategy: strategy:
matrix: matrix:
cuda_version: ["11.8.0", "12.8.1"] cuda_version: ["11.8.0", "12.6.3", "12.8.1"]
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025] os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
include: include:
- os: ubuntu-22.04 - os: ubuntu-22.04
...@@ -100,7 +100,7 @@ jobs: ...@@ -100,7 +100,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15] os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
torch_version: ["2.7.0"] torch_version: ["2.6.0", "2.7.0"]
include: include:
- os: ubuntu-22.04 - os: ubuntu-22.04
arch: x86_64 arch: x86_64
...@@ -138,9 +138,35 @@ jobs: ...@@ -138,9 +138,35 @@ jobs:
- name: Show installed packages - name: Show installed packages
run: pip list run: pip list
- name: Show environment information
run: python -m torch.utils.collect_env
- name: Run tests - name: Run tests
run: pytest --durations=100 run: pytest --durations=100
# cuda-aarch64-tests:
# if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
# needs: build-cuda
# strategy:
# fail-fast: false
# matrix:
# os: [ubuntu-22.04-arm]
# arch: [aarch64]
# torch_version: ["2.7.0"]
# cuda_version: ["11.8.0", "12.8.1"]
# runs-on: bandb-aws-g5g-4xlarge-plus-use1-public-80
# env:
# BNB_TEST_DEVICE: cuda
# steps:
# - name: Show GPU Information
# run: nvidia-smi
# - name: Show pip packages
# run: pip list
cuda-tests: cuda-tests:
if: github.repository == 'bitsandbytes-foundation/bitsandbytes' if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
needs: build-cuda needs: build-cuda
...@@ -149,25 +175,28 @@ jobs: ...@@ -149,25 +175,28 @@ jobs:
matrix: matrix:
os: [ubuntu-22.04, windows-2025] os: [ubuntu-22.04, windows-2025]
arch: [x86_64] arch: [x86_64]
gpu: [T4, L4] gpu: [T4, L40S]
cuda_version: ["11.8.0", "12.8.1"] cuda_version: ["11.8.0", "12.6.3", "12.8.1"]
include: include:
- cuda_version: "11.8.0" - cuda_version: "11.8.0"
torch_version: "2.4.1" torch_version: "2.4.1"
pypi_index: "https://download.pytorch.org/whl/cu118" pypi_index: "https://download.pytorch.org/whl/cu118"
- cuda_version: "12.6.3"
torch_version: "2.6.0"
pypi_index: "https://download.pytorch.org/whl/cu126"
- cuda_version: "12.8.1" - cuda_version: "12.8.1"
torch_version: "2.7.0" torch_version: "2.7.0"
pypi_index: "https://download.pytorch.org/whl/cu128" pypi_index: "https://download.pytorch.org/whl/cu128"
# L4 runners # L40S runners
- os: ubuntu-22.04 - os: ubuntu-22.04
gpu: L4 gpu: L40S
runner: bandb-aws-g6-4xlarge-plus-use1-public-80 runner: bandb-aws-g6e-4xlarge-plus-use1-public-80
# T4 runners # T4 runners
- os: ubuntu-22.04 - os: ubuntu-22.04
gpu: T4 gpu: T4
runner: CUDA-Linux-x64 runner: bandb-aws-g4dn-4xlarge-plus-use1-public-80
- os: windows-2025 - os: windows-2025
gpu: T4 gpu: T4
runner: CUDA-Windows-x64 runner: CUDA-Windows-x64
...@@ -176,10 +205,12 @@ jobs: ...@@ -176,10 +205,12 @@ jobs:
# and cannot support CUDA 12+. Skip for now. # and cannot support CUDA 12+. Skip for now.
- os: windows-2025 - os: windows-2025
cuda_version: "12.8.1" cuda_version: "12.8.1"
- os: windows-2025
cuda_version: "12.6.3"
# No Windows L4 runners. # No Windows L40S runners.
- os: windows-2025 - os: windows-2025
gpu: L4 gpu: L40S
runs-on: ${{ matrix.runner }} runs-on: ${{ matrix.runner }}
env: env:
BNB_TEST_DEVICE: cuda BNB_TEST_DEVICE: cuda
...@@ -210,5 +241,8 @@ jobs: ...@@ -210,5 +241,8 @@ jobs:
- name: Show installed packages - name: Show installed packages
run: pip list run: pip list
- name: Show environment information
run: python -m torch.utils.collect_env
- name: Run tests - name: Run tests
run: pytest --durations=100 run: pytest --durations=100
...@@ -929,39 +929,6 @@ class TestSpMMFunctional: ...@@ -929,39 +929,6 @@ class TestSpMMFunctional:
# torch.cuda.synchronize() # torch.cuda.synchronize()
# print(time.time() - t0) # print(time.time() - t0)
@pytest.mark.parametrize("dim1", [256, 1024], ids=id_formatter("dim1"))
@pytest.mark.parametrize("dim2", [256, 1024], ids=id_formatter("dim2"))
@pytest.mark.skip("No longer supported")
def test_integrated_sparse_decomp(self, dim1, dim2):
threshold = 3.0
for _ in range(k):
A = torch.randn(dim1, dim2).cuda().half()
w1 = torch.randn(dim1, dim2).cuda().half()
out1 = torch.matmul(A, w1.t())
Cw1, statsw1, _ = F.int8_vectorwise_quant(w1)
CA, statsA, _ = F.int8_vectorwise_quant(A)
out1_32 = F.int8_linear_matmul(CA, Cw1)
out2 = F.int8_mm_dequant(out1_32, statsA, statsw1)
# CA, statsA, outlier_cols = F.int8_vectorwise_quant(A, threshold=threshold)
CA, _, statsA, _, coo_tensor = F.double_quant(A, threshold=threshold)
out1_32 = F.int8_linear_matmul(CA, Cw1)
out3 = F.int8_mm_dequant(out1_32, statsA, statsw1)
assert coo_tensor is not None
out4 = F.spmm_coo(coo_tensor, w1.t())
# idx = torch.unique(coo_tensor._indices()[1]).long()
# out4 = torch.matmul(A, w1.t())
out5 = out3 + out4
err1 = torch.abs(out1 - out2).mean().item()
err2 = torch.abs(out1 - out5).mean().item()
assert err2 < err1
@pytest.mark.parametrize("dim1", [1 * 2048]) @pytest.mark.parametrize("dim1", [1 * 2048])
@pytest.mark.parametrize("dim2", [2048]) @pytest.mark.parametrize("dim2", [2048])
@pytest.mark.parametrize("dtype", [torch.int8]) @pytest.mark.parametrize("dtype", [torch.int8])
......
...@@ -130,7 +130,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -130,7 +130,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
assert l1.weight.dtype == torch.int8 assert l1.weight.dtype == torch.int8
l1.eval() l1.eval()
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = l1(b1) o1 = l1(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
...@@ -139,7 +139,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -139,7 +139,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
assert mlp.fc1.weight.dtype == torch.int8 assert mlp.fc1.weight.dtype == torch.int8
assert mlp.fc2.weight.dtype == torch.int8 assert mlp.fc2.weight.dtype == torch.int8
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = mlp(b1) o1 = mlp(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
...@@ -152,7 +152,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -152,7 +152,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
assert mlp.fc1.weight.dtype == torch.int8 assert mlp.fc1.weight.dtype == torch.int8
assert mlp.fc2.weight.dtype == torch.int8 assert mlp.fc2.weight.dtype == torch.int8
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = mlp(b1) o1 = mlp(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
...@@ -163,7 +163,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -163,7 +163,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
mlp = MLP8bit(32, 64, threshold=threshold, has_fp16_weights=False).half().to(device) mlp = MLP8bit(32, 64, threshold=threshold, has_fp16_weights=False).half().to(device)
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = mlp(b1) o1 = mlp(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
...@@ -185,7 +185,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -185,7 +185,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
.to(device) .to(device)
) )
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = mlp(b1) o1 = mlp(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
...@@ -207,7 +207,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold): ...@@ -207,7 +207,7 @@ def test_linear8bitlt_no_fp16_weights(device, threshold):
w1, w2 = mlp.fc1.weight.clone().to(device), mlp.fc2.weight.clone().to(device) # grab weights before quantization, w1, w2 = mlp.fc1.weight.clone().to(device), mlp.fc2.weight.clone().to(device) # grab weights before quantization,
mlp = mlp.to(device).half() # and this line triggers quantization mlp = mlp.to(device).half() # and this line triggers quantization
for i in range(100): for i in range(4):
b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16) b1 = torch.randn(16, 8, 32, device=device, dtype=torch.float16)
o1 = mlp(b1) o1 = mlp(b1)
assert o1.dtype == torch.float16 assert o1.dtype == torch.float16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment