Unverified Commit 29564ad6 authored by Matthew Douglas's avatar Matthew Douglas Committed by GitHub
Browse files

CI: Setup HPU nightly tests (#1681)

* Setup XPU CI

* CI: expand XPU matrix

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* skip some fp4 tests on hpu

* skip some fp4 tests on hpu

* skip gemv tests on hpu

* test

* Additional test patches for HPU

* HPU test update

* HPU test update

* HPU test update

* HPU test update

* Format
parent 70bbbb92
......@@ -222,6 +222,133 @@ jobs:
# - name: Show pip packages
# run: pip list
test-hpu:
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
needs: build-cpu
strategy:
fail-fast: false
matrix:
torch_version: ["2.6.0"]
runs-on:
group: bandb-itac-bmemr-gaudi3-1gaudi
env:
BNB_TEST_DEVICE: hpu
container:
image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
BNB_TEST_DEVICE: hpu
steps:
- name: Show system information
run: |
echo "OS: $(uname -a)"
echo "CPU: $(lscpu | grep 'Model name')"
echo "Memory: $(free -h)"
- name: Show HPU Information
run: |
hl-smi
- uses: actions/checkout@v4
- name: Download build artifact
uses: actions/download-artifact@v4
with:
name: lib_cpu_ubuntu-22.04_x86_64
path: bitsandbytes/
merge-multiple: true
- name: Show installed packages
run: pip list
- name: Install dependencies
run: |
pip install -e ".[test]"
pip install pytest-cov
- name: Show installed packages
run: pip list
- name: Show environment information
run: |
python -m torch.utils.collect_env
python -m bitsandbytes
- name: Run tests
run: pytest --durations=100
test-xpu:
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
needs: build-cpu
strategy:
fail-fast: false
matrix:
torch_version: ["2.7.1"] #["2.6.0", "2.7.1"]
ipex: [false]
# ipex: [true, false]
# include:
# - torch_version: "2.6.0"
# ipex: true
# ipex_version: "2.6.10+xpu"
# - torch_version: "2.7.1"
# ipex: true
# ipex_version: "2.7.10+xpu"
runs-on:
group: bandb-itac-bmsprpvc1550-8-1gpu
env:
BNB_TEST_DEVICE: xpu
steps:
- name: Show system information
run: |
echo "OS: $(uname -a)"
echo "CPU: $(lscpu | grep 'Model name')"
echo "Memory: $(free -h)"
- name: Show XPU Information
run: |
xpu-smi discovery
sudo xpu-smi discovery
sudo apt-get install -y hwinfo
hwinfo --display
- uses: actions/checkout@v4
- name: Download build artifact
uses: actions/download-artifact@v4
with:
name: lib_cpu_ubuntu-22.04_x86_64
path: bitsandbytes/
merge-multiple: true
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install PyTorch
run: pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/xpu
- name: Install IPEX
if: matrix.ipex == true
run: pip install intel_extension_for_pytorch==${{ matrix.ipex_version }} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
- name: Install dependencies
run: |
pip install -e ".[test]"
pip install pytest-cov
- name: Show installed packages
run: pip list
- name: Show environment information
run: |
python -m torch.utils.collect_env
python -m bitsandbytes
# - name: Run tests
# run: pytest --durations=100
test-cuda:
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
needs: build-cuda
......
......@@ -4,6 +4,7 @@
# LICENSE file in the root directory of this source tree.
import importlib
import sys
import torch
......@@ -37,8 +38,13 @@ if torch.cuda.is_available():
if hasattr(torch, "xpu") and torch.xpu.is_available():
from .backends.xpu import ops as xpu_ops
if hasattr(torch, "hpu") and torch.hpu.is_available():
from .backends.hpu import ops as hpu_ops
if importlib.util.find_spec("habana_frameworks") and importlib.util.find_spec("habana_frameworks.torch"):
# In case not automatically imported
import habana_frameworks.torch
if hasattr(torch, "hpu") and torch.hpu.is_available():
from .backends.hpu import ops as hpu_ops
def _import_backends():
......
......@@ -234,6 +234,9 @@ def test_matmul_4bit(
out_bnb.data.copy_(out_torch)
if device == "cuda":
torch.cuda.synchronize()
elif device == "hpu":
torch.hpu.synchronize()
loss_bnb = torch.nn.functional.mse_loss(out_bnb, target).mean()
loss_bnb.backward()
gradA1 = A.grad
......
......@@ -257,7 +257,8 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
ref_output = net(x)
# Compile the model
compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode)
compile_backend = "hpu_backend" if device == "hpu" else "inductor"
compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode, backend=compile_backend)
# Get output from compiled model
with torch.no_grad():
......
......@@ -5,7 +5,7 @@ import torch
from torch import nn
import bitsandbytes as bnb
from tests.helpers import get_available_devices, id_formatter
from tests.helpers import get_available_devices, id_formatter, is_supported_on_hpu
class MockArgs:
......@@ -276,9 +276,9 @@ module_dict = {
"NF4": bnb.nn.LinearNF4,
"FP4+C": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compress_statistics=True),
"NF4+C": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True),
"NF4+fp32": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float32),
"NF4+fp16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16),
"NF4+bf16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16),
"NF4+fp32": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float32),
"NF4+fp16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float16),
"NF4+bf16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.bfloat16),
}
......@@ -295,7 +295,12 @@ def test_kbit_backprop(device, module):
torch.nn.init.kaiming_normal_(ref[0].weight)
torch.nn.init.kaiming_normal_(ref[1].weight)
ref[1].weight.requires_grad_(False)
kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)])
if device == "hpu" and isinstance(kbit[1], bnb.nn.Linear4bit) and kbit[1].weight.quant_type == "fp4":
pytest.skip("FP4 is not supported on HPU")
kbit[0].weight.detach().copy_(ref[0].weight)
kbit[1].weight.detach().copy_(ref[1].weight)
kbit[0].bias.detach().copy_(ref[0].bias)
......@@ -358,6 +363,12 @@ def test_kbit_backprop(device, module):
ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
)
def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim, quant_storage):
if device == "hpu":
if embedding_class is bnb.nn.EmbeddingFP4:
pytest.skip("FP4 is not supported on HPU")
elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
pytest.skip("This configuration is not supported on HPU")
num_embeddings = 128
src_weight = (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to(
......@@ -403,6 +414,12 @@ def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim,
ids=lambda x: x.__name__ if inspect.isclass(x) else str(x),
)
def test_embedding_error(device, embedding_class, input_shape, embedding_dim, quant_storage):
if device == "hpu":
if embedding_class is bnb.nn.EmbeddingFP4:
pytest.skip("FP4 is not supported on HPU")
elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage):
pytest.skip("This configuration is not supported on HPU")
is_8bit = embedding_class is bnb.nn.Embedding8bit
num_embeddings = 128
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment