Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
bitsandbytes
Commits
70bbbb92
Unverified
Commit
70bbbb92
authored
Jun 16, 2025
by
Chetan Kumar Verma
Committed by
GitHub
Jun 16, 2025
Browse files
HPU support for unit tests (#1680)
parent
d863adb2
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
63 additions
and
11 deletions
+63
-11
bitsandbytes/backends/hpu/ops.py
bitsandbytes/backends/hpu/ops.py
+0
-5
tests/helpers.py
tests/helpers.py
+11
-0
tests/test_autograd.py
tests/test_autograd.py
+4
-0
tests/test_functional.py
tests/test_functional.py
+16
-2
tests/test_linear4bit.py
tests/test_linear4bit.py
+22
-3
tests/test_ops.py
tests/test_ops.py
+10
-1
No files found.
bitsandbytes/backends/hpu/ops.py
View file @
70bbbb92
...
...
@@ -29,8 +29,6 @@ def _(
if
A
.
dtype
!=
torch
.
uint8
:
A
=
A
.
view
(
torch
.
uint8
)
transpose
=
False
if
len
(
A
.
shape
)
==
2
and
A
.
shape
[
0
]
==
1
else
True
A
=
A
.
reshape
(
-
1
)
if
GAUDI_SW_VER
and
(
GAUDI_SW_VER
.
major
<
1
or
GAUDI_SW_VER
.
minor
<
22
):
...
...
@@ -47,7 +45,4 @@ def _(
output
=
out_dq
.
reshape
(
shape
)
if
transpose
:
output
=
output
.
t
()
return
output
tests/helpers.py
View file @
70bbbb92
...
...
@@ -98,3 +98,14 @@ DTYPE_NAMES = {
def
describe_dtype
(
dtype
:
torch
.
dtype
)
->
str
:
return
DTYPE_NAMES
.
get
(
dtype
)
or
str
(
dtype
).
rpartition
(
"."
)[
2
]
def
is_supported_on_hpu
(
quant_type
:
str
=
"nf4"
,
dtype
:
torch
.
dtype
=
torch
.
bfloat16
,
quant_storage
:
torch
.
dtype
=
torch
.
uint8
)
->
bool
:
"""
Check if the given quant_type, dtype and quant_storage are supported on HPU.
"""
if
quant_type
==
"fp4"
or
dtype
==
torch
.
float16
or
quant_storage
not
in
(
torch
.
uint8
,
torch
.
bfloat16
):
return
False
return
True
tests/test_autograd.py
View file @
70bbbb92
...
...
@@ -8,6 +8,7 @@ from tests.helpers import (
describe_dtype
,
get_available_devices
,
id_formatter
,
is_supported_on_hpu
,
)
TRANSPOSE_VALS
=
[(
False
,
True
),
(
False
,
False
)]
...
...
@@ -189,6 +190,9 @@ def test_matmul_4bit(
if
device
==
"cpu"
and
dtype
!=
torch
.
float32
and
any
(
req_grad
)
and
torch
.
__version__
<
(
2
,
6
):
pytest
.
xfail
(
"mse_loss fp16 on CPU is not supported in torch < 2.6"
)
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
for
i
in
range
(
3
):
# normal multiply
if
funcs
[
0
]
in
[
torch
.
mm
,
torch
.
matmul
]:
...
...
tests/test_functional.py
View file @
70bbbb92
...
...
@@ -16,6 +16,7 @@ from tests.helpers import (
get_available_devices
,
get_test_dims
,
id_formatter
,
is_supported_on_hpu
,
)
torch
.
set_printoptions
(
precision
=
5
,
sci_mode
=
False
,
linewidth
=
120
,
edgeitems
=
20
,
threshold
=
10000
)
...
...
@@ -1101,6 +1102,9 @@ class TestQuantize4BitFunctional:
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
,
256
,
512
,
1024
,
2048
,
4096
])
def
test_4bit_quant
(
self
,
device
,
dtype
,
quant_type
,
blocksize
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
A1
=
torch
.
randn
(
1024
,
1024
,
device
=
device
,
dtype
=
dtype
)
qa
,
SA
=
F
.
quantize_4bit
(
A1
,
blocksize
=
blocksize
,
quant_type
=
quant_type
)
A2
=
F
.
dequantize_4bit
(
qa
,
SA
,
blocksize
=
blocksize
,
quant_type
=
quant_type
)
...
...
@@ -1132,11 +1136,15 @@ class TestQuantize4BitFunctional:
@
pytest
.
mark
.
parametrize
(
"device"
,
get_available_devices
())
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
],
ids
=
id_formatter
(
"blocksize"
))
def
test_4bit_compressed_stats
(
self
,
device
,
quant_type
,
blocksize
):
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
torch
.
float32
,
torch
.
float16
],
ids
=
describe_dtype
)
def
test_4bit_compressed_stats
(
self
,
device
,
quant_type
,
blocksize
,
dtype
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
):
pytest
.
skip
(
"FP4 quantization is not supported on HPU."
)
errs1
=
[]
errs2
=
[]
for
i
in
range
(
10
):
A1
=
torch
.
randn
(
1024
,
1024
,
device
=
device
).
half
(
)
A1
=
torch
.
randn
(
1024
,
1024
,
device
=
device
,
dtype
=
dtype
)
q2
,
SA2
=
F
.
quantize_4bit
(
A1
,
blocksize
=
blocksize
,
quant_type
=
quant_type
)
q3
,
SA3
=
F
.
quantize_4bit
(
A1
,
blocksize
=
blocksize
,
compress_statistics
=
True
,
quant_type
=
quant_type
)
A2
=
F
.
dequantize_4bit
(
q2
,
SA2
,
quant_type
=
quant_type
)
...
...
@@ -1205,6 +1213,9 @@ class TestQuantize4BitFunctional:
)
@
pytest
.
mark
.
parametrize
(
"dim"
,
[
128
,
256
,
512
,
1024
],
ids
=
id_formatter
(
"dim"
))
def
test_gemv_4bit
(
self
,
device
,
dim
,
dtype
,
storage_type
,
quant_storage
,
double_quant
,
kind
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
storage_type
,
dtype
,
quant_storage
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
errs1
=
[]
errs2
=
[]
errs3
=
[]
...
...
@@ -1354,6 +1365,9 @@ class TestQuantize4BitFunctional:
if
device
==
"cpu"
and
dtype
==
torch
.
bfloat16
and
torch
.
__version__
<
(
2
,
3
):
pytest
.
skip
(
"eye doe not support bfloat16 on CPU in torch < 2.3"
)
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
storage_type
,
dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
dims
=
10
torch
.
random
.
manual_seed
(
np
.
random
.
randint
(
0
,
412424242
))
dims
=
get_test_dims
(
0
,
8192
,
n
=
dims
)
...
...
tests/test_linear4bit.py
View file @
70bbbb92
...
...
@@ -13,6 +13,7 @@ from tests.helpers import (
describe_dtype
,
get_available_devices
,
id_formatter
,
is_supported_on_hpu
,
torch_load_from_buffer
,
torch_save_to_buffer
,
)
...
...
@@ -27,12 +28,17 @@ storage = {
@
pytest
.
mark
.
parametrize
(
"device"
,
get_available_devices
())
@
pytest
.
mark
.
parametrize
(
"quant_storage"
,
[
"uint8"
,
"float16"
,
"bfloat16"
,
"float32"
])
@
pytest
.
mark
.
parametrize
(
"original_dtype"
,
[
torch
.
float16
,
torch
.
bfloat16
])
@
pytest
.
mark
.
parametrize
(
"bias"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"bias"
))
@
pytest
.
mark
.
parametrize
(
"compress_statistics"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"compress_statistics"
))
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"nf4"
,
"fp4"
])
@
pytest
.
mark
.
parametrize
(
"save_before_forward"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"save_before_forward"
))
def
test_linear_serialization
(
device
,
quant_type
,
compress_statistics
,
bias
,
quant_storage
,
save_before_forward
):
original_dtype
=
torch
.
float16
def
test_linear_serialization
(
device
,
quant_type
,
original_dtype
,
compress_statistics
,
bias
,
quant_storage
,
save_before_forward
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
original_dtype
,
storage
[
quant_storage
]):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
compute_dtype
=
None
layer_shape
=
(
300
,
400
)
...
...
@@ -188,6 +194,9 @@ def test_linear_serialization(device, quant_type, compress_statistics, bias, qua
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
])
@
pytest
.
mark
.
parametrize
(
"compress_statistics"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"compress_statistics"
))
def
test_copy_param
(
device
,
quant_type
,
blocksize
,
compress_statistics
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
tensor
=
torch
.
randn
(
300
,
400
)
param
=
bnb
.
nn
.
Params4bit
(
data
=
tensor
,
...
...
@@ -207,6 +216,9 @@ def test_copy_param(device, quant_type, blocksize, compress_statistics):
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
])
@
pytest
.
mark
.
parametrize
(
"compress_statistics"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"compress_statistics"
))
def
test_deepcopy_param
(
device
,
quant_type
,
blocksize
,
compress_statistics
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
tensor
=
torch
.
randn
(
300
,
400
)
param
=
bnb
.
nn
.
Params4bit
(
data
=
tensor
,
...
...
@@ -233,6 +245,9 @@ def test_deepcopy_param(device, quant_type, blocksize, compress_statistics):
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
])
@
pytest
.
mark
.
parametrize
(
"compress_statistics"
,
TRUE_FALSE
,
ids
=
id_formatter
(
"compress_statistics"
))
def
test_params4bit_real_serialization
(
device
,
quant_type
,
blocksize
,
compress_statistics
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
original_tensor
=
torch
.
randn
(
300
,
400
)
original_param
=
bnb
.
nn
.
Params4bit
(
data
=
original_tensor
,
...
...
@@ -270,6 +285,9 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s
@
pytest
.
mark
.
parametrize
(
"mode"
,
[
"default"
,
"reduce-overhead"
],
ids
=
id_formatter
(
"mode"
))
@
pytest
.
mark
.
skipif
(
torch
.
__version__
<
(
2
,
4
),
reason
=
"Not supported in torch < 2.4"
)
def
test_linear4bit_torch_compile
(
device
,
quant_type
,
compute_dtype
,
compress_statistics
,
bias
,
fullgraph
,
mode
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
if
fullgraph
and
torch
.
__version__
<
(
2
,
8
,
0
,
"dev"
):
pytest
.
skip
(
"fullgraph mode requires torch 2.8 or higher"
)
...
...
@@ -314,7 +332,8 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st
ref_output
=
net
(
x
)
# Compile the model
compiled_net
=
torch
.
compile
(
net
,
fullgraph
=
fullgraph
,
mode
=
mode
)
compile_backend
=
"hpu_backend"
if
device
==
"hpu"
else
"inductor"
compiled_net
=
torch
.
compile
(
net
,
fullgraph
=
fullgraph
,
mode
=
mode
,
backend
=
compile_backend
)
# Get output from compiled model
with
torch
.
no_grad
():
...
...
tests/test_ops.py
View file @
70bbbb92
...
...
@@ -5,7 +5,7 @@ import torch
import
bitsandbytes
from
bitsandbytes.functional
import
ipex_xpu
from
tests.helpers
import
TRUE_FALSE
,
get_available_devices
,
id_formatter
from
tests.helpers
import
TRUE_FALSE
,
get_available_devices
,
id_formatter
,
is_supported_on_hpu
# torch.library.opcheck is only available in torch 2.4 and later.
# When testing with older versions, we will skip it as a no-op.
...
...
@@ -158,6 +158,9 @@ class Test4bitBlockwiseQuantOps:
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
,
256
,
512
])
def
test_quantize_4bit
(
self
,
device
,
dtype
,
storage_dtype
,
quant_type
,
blocksize
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
,
storage_dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
A
=
torch
.
randn
(
1024
,
1024
,
dtype
=
dtype
,
device
=
device
)
out
,
absmax
=
torch
.
ops
.
bitsandbytes
.
quantize_4bit
.
default
(
A
,
blocksize
,
quant_type
,
storage_dtype
)
...
...
@@ -179,6 +182,9 @@ class Test4bitBlockwiseQuantOps:
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
,
256
,
512
])
def
test_dequantize_4bit
(
self
,
device
,
dtype
,
storage_dtype
,
quant_type
,
blocksize
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
,
storage_dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
shape
=
(
128
,
128
)
n
=
prod
(
shape
)
...
...
@@ -210,6 +216,9 @@ class Test4bitBlockwiseQuantOps:
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
"fp4"
,
"nf4"
])
@
pytest
.
mark
.
parametrize
(
"blocksize"
,
[
64
,
128
,
256
,
512
])
def
test_gemv_4bit
(
self
,
device
,
dtype
,
storage_dtype
,
quant_type
,
blocksize
):
if
device
==
"hpu"
and
not
is_supported_on_hpu
(
quant_type
,
dtype
,
storage_dtype
):
pytest
.
skip
(
"This configuration is not supported on HPU."
)
out_features
=
1024
in_features
=
256
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment