Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2ea8bd27
Commit
2ea8bd27
authored
Jun 05, 2025
by
zhuwenwen
Browse files
[test] update mq_llm_engine
parent
fe306013
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
40 deletions
+41
-40
tests/models/test_transformers.py
tests/models/test_transformers.py
+39
-38
tests/mq_llm_engine/test_error_handling.py
tests/mq_llm_engine/test_error_handling.py
+2
-2
tests/multi_step/untest_correctness_llm.py
tests/multi_step/untest_correctness_llm.py
+0
-0
No files found.
tests/models/test_transformers.py
View file @
2ea8bd27
...
...
@@ -3,10 +3,11 @@
Run `pytest tests/models/test_transformers.py`.
"""
import
os
import
pytest
from
..conftest
import
HfRunner
,
VllmRunner
from
..utils
import
multi_gpu_test
from
..utils
import
multi_gpu_test
,
models_path_prefix
from
.utils
import
check_logprobs_close
...
...
@@ -67,40 +68,40 @@ def test_distributed(
"meta-llama/Llama-3.2-1B-Instruct"
,
**
kwargs
)
@
pytest
.
mark
.
parametrize
(
"model, quantization_kwargs"
,
[
(
"meta-llama/Llama-3.2-1B-Instruct"
,
{
"quantization"
:
"bitsandbytes"
,
},
),
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_quantization
(
vllm_runner
:
type
[
VllmRunner
],
example_prompts
:
list
[
str
],
model
:
str
,
quantization_kwargs
:
dict
[
str
,
str
],
max_tokens
:
int
,
num_logprobs
:
int
,
)
->
None
:
with
vllm_runner
(
model
,
model_impl
=
"auto"
,
enforce_eager
=
True
,
**
quantization_kwargs
)
as
vllm_model
:
# type: ignore[arg-type]
vllm_outputs
=
vllm_model
.
generate_greedy_logprobs
(
example_prompts
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
)
#
@pytest.mark.parametrize("model, quantization_kwargs", [
#
(
#
os.path.join(models_path_prefix,
"meta-llama/Llama-3.2-1B-Instruct"
)
,
#
{
#
"quantization": "bitsandbytes",
#
},
#
),
#
])
#
@pytest.mark.parametrize("max_tokens", [32])
#
@pytest.mark.parametrize("num_logprobs", [5])
#
def test_quantization(
#
vllm_runner: type[VllmRunner],
#
example_prompts: list[str],
#
model: str,
#
quantization_kwargs: dict[str, str],
#
max_tokens: int,
#
num_logprobs: int,
#
) -> None:
#
with vllm_runner(
#
model, model_impl="auto", enforce_eager=True,
#
**quantization_kwargs) as vllm_model: # type: ignore[arg-type]
#
vllm_outputs = vllm_model.generate_greedy_logprobs(
#
example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs)
with
vllm_runner
(
model
,
model_impl
=
"transformers"
,
enforce_eager
=
True
,
**
quantization_kwargs
)
as
vllm_model
:
# type: ignore[arg-type]
transformers_outputs
=
vllm_model
.
generate_greedy_logprobs
(
example_prompts
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
)
check_logprobs_close
(
outputs_0_lst
=
transformers_outputs
,
outputs_1_lst
=
vllm_outputs
,
name_0
=
"transformers"
,
name_1
=
"vllm"
,
)
#
with vllm_runner(
#
model,
#
model_impl="transformers",
#
enforce_eager=True,
#
**quantization_kwargs) as vllm_model: # type: ignore[arg-type]
#
transformers_outputs = vllm_model.generate_greedy_logprobs(
#
example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs)
#
check_logprobs_close(
#
outputs_0_lst=transformers_outputs,
#
outputs_1_lst=vllm_outputs,
#
name_0="transformers",
#
name_1="vllm",
#
)
\ No newline at end of file
tests/mq_llm_engine/test_error_handling.py
View file @
2ea8bd27
...
...
@@ -256,7 +256,7 @@ async def test_mp_crash_detection(monkeypatch: pytest.MonkeyPatch):
pass
end
=
time
.
perf_counter
()
assert
end
-
start
<
6
0
,
(
assert
end
-
start
<
12
0
,
(
"Expected vLLM to gracefully shutdown in <60s "
"if there is an error in the startup."
)
...
...
tests/multi_step/test_correctness_llm.py
→
tests/multi_step/
un
test_correctness_llm.py
View file @
2ea8bd27
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment