Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
77f77a95
Unverified
Commit
77f77a95
authored
Jul 10, 2025
by
Isotr0py
Committed by
GitHub
Jul 10, 2025
Browse files
[Misc] Clean up mark to fork process in BNB tests (#20692)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
1a4f35e2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
18 deletions
+11
-18
tests/models/quantization/test_bitsandbytes.py
tests/models/quantization/test_bitsandbytes.py
+11
-18
No files found.
tests/quantization/test_bitsandbytes.py
→
tests/
models/
quantization/test_bitsandbytes.py
View file @
77f77a95
...
...
@@ -13,8 +13,8 @@ from transformers import BitsAndBytesConfig
from
tests.quantization.utils
import
is_quant_method_supported
from
..
models
.utils
import
c
heck_embeddings_close
from
..utils
import
c
ompare_two_settings
,
create_new_process_for_each_test
from
...utils
import
c
ompare_two_settings
,
multi_gpu_test
from
..utils
import
c
heck_embeddings_close
models_4bit_to_test
=
[
(
"facebook/opt-125m"
,
"quantize opt model inflight"
),
...
...
@@ -42,7 +42,6 @@ models_pre_quant_8bit_to_test = [
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_4bit_to_test
)
@
create_new_process_for_each_test
()
def
test_load_4bit_bnb_model
(
hf_runner
,
vllm_runner
,
example_prompts
,
model_name
,
description
)
->
None
:
...
...
@@ -56,7 +55,6 @@ def test_load_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
reason
=
'bitsandbytes is not supported on this GPU type.'
)
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_pre_qaunt_4bit_to_test
)
@
create_new_process_for_each_test
()
def
test_load_pre_quant_4bit_bnb_model
(
hf_runner
,
vllm_runner
,
example_prompts
,
model_name
,
description
)
->
None
:
...
...
@@ -68,7 +66,6 @@ def test_load_pre_quant_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
reason
=
'bitsandbytes is not supported on this GPU type.'
)
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_pre_quant_8bit_to_test
)
@
create_new_process_for_each_test
()
def
test_load_8bit_bnb_model
(
hf_runner
,
vllm_runner
,
example_prompts
,
model_name
,
description
)
->
None
:
...
...
@@ -76,12 +73,10 @@ def test_load_8bit_bnb_model(hf_runner, vllm_runner, example_prompts,
model_name
,
True
)
@
pytest
.
mark
.
skipif
(
torch
.
cuda
.
device_count
()
<
2
,
reason
=
'Test requires at least 2 GPUs.'
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_4bit_to_test
)
@
create_new_process_for_each_test
(
)
@
multi_gpu_test
(
num_gpus
=
2
)
def
test_load_tp_4bit_bnb_model
(
hf_runner
,
vllm_runner
,
example_prompts
,
model_name
,
description
)
->
None
:
...
...
@@ -96,12 +91,10 @@ def test_load_tp_4bit_bnb_model(hf_runner, vllm_runner, example_prompts,
vllm_tp_size
=
2
)
@
pytest
.
mark
.
skipif
(
torch
.
cuda
.
device_count
()
<
2
,
reason
=
'Test requires at least 2 GPUs.'
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"bitsandbytes"
),
reason
=
'bitsandbytes is not supported on this GPU type.'
)
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_4bit_to_test
)
@
create_new_process_for_each_test
(
)
@
multi_gpu_test
(
num_gpus
=
2
)
def
test_load_pp_4bit_bnb_model
(
model_name
,
description
)
->
None
:
common_args
=
[
"--disable-log-stats"
,
...
...
@@ -127,7 +120,6 @@ def test_load_pp_4bit_bnb_model(model_name, description) -> None:
@
pytest
.
mark
.
parametrize
(
"model_name, description"
,
models_4bit_to_embedding_test
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
@
create_new_process_for_each_test
()
def
test_4bit_bnb_embedding_model
(
model_name
,
description
,
...
...
@@ -146,6 +138,13 @@ def test_4bit_bnb_embedding_model(
example_prompts
=
[
str
(
s
).
strip
()
for
s
in
example_prompts
]
# Inflight 4bit quantization
with
vllm_runner
(
model_name
,
task
=
"embed"
,
dtype
=
dtype
,
gpu_memory_utilization
=
0.5
,
quantization
=
"bitsandbytes"
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
)
hf_model_kwargs
=
dict
(
quantization_config
=
BitsAndBytesConfig
(
load_in_4bit
=
True
))
with
hf_runner
(
...
...
@@ -156,12 +155,6 @@ def test_4bit_bnb_embedding_model(
)
as
hf_model
:
hf_outputs
=
hf_model
.
encode
(
example_prompts
)
with
vllm_runner
(
model_name
,
task
=
"embed"
,
dtype
=
dtype
,
gpu_memory_utilization
=
0.5
,
quantization
=
"bitsandbytes"
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
embed
(
example_prompts
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment