Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
367856de
Unverified
Commit
367856de
authored
Jan 05, 2026
by
Isotr0py
Committed by
GitHub
Jan 05, 2026
Browse files
[CI/Build] Revive skipped reward models e2e test (#31665)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
da436f86
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
4 deletions
+64
-4
tests/models/fixtures/qwen2_5_math_prm_reward_step.json
tests/models/fixtures/qwen2_5_math_prm_reward_step.json
+1
-0
tests/models/language/pooling/test_reward.py
tests/models/language/pooling/test_reward.py
+63
-4
No files found.
tests/models/fixtures/qwen2_5_math_prm_reward_step.json
0 → 100644
View file @
367856de
[[[
0.0006361007690429688
,
0.99951171875
],
[
0.81884765625
,
0.1812744140625
],
[
0.025543212890625
,
0.974609375
],
[
0.0004382133483886719
,
0.99951171875
]]]
\ No newline at end of file
tests/models/language/pooling/test_reward.py
View file @
367856de
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
json
from
typing
import
TYPE_CHECKING
import
pytest
import
pytest
import
torch
import
torch
...
@@ -9,7 +11,18 @@ from transformers import AutoModel
...
@@ -9,7 +11,18 @@ from transformers import AutoModel
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
....conftest
import
HfRunner
from
....conftest
import
HfRunner
from
...utils
import
check_transformers_version
from
....utils
import
VLLM_PATH
from
...registry
import
HF_EXAMPLE_MODELS
if
TYPE_CHECKING
:
from
_typeshed
import
StrPath
FIXTURES_PATH
=
VLLM_PATH
/
"tests/models/fixtures"
assert
FIXTURES_PATH
.
exists
()
FIXTURE_REWARD_RESULT
=
{
"Qwen/Qwen2.5-Math-PRM-7B"
:
FIXTURES_PATH
/
"qwen2_5_math_prm_reward_step.json"
,
}
@
pytest
.
fixture
@
pytest
.
fixture
...
@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner):
...
@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner):
return
hf_model
return
hf_model
def
dump_reward_outputs
(
outputs
:
list
[
list
[
float
]],
filename
:
"StrPath"
):
with
open
(
filename
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
outputs
,
f
)
def
load_reward_outputs
(
filename
:
"StrPath"
)
->
list
[
list
[
float
]]:
with
open
(
filename
,
encoding
=
"utf-8"
)
as
f
:
return
json
.
load
(
f
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model"
,
"model"
,
[
[
...
@@ -77,9 +100,8 @@ def test_prm_models(
...
@@ -77,9 +100,8 @@ def test_prm_models(
model
:
str
,
model
:
str
,
dtype
:
str
,
dtype
:
str
,
)
->
None
:
)
->
None
:
check_transformers_version
(
model_info
=
HF_EXAMPLE_MODELS
.
find_hf_info
(
model
)
"Qwen/Qwen2.5-Math-PRM-7B"
,
max_transformers_version
=
"4.53.2"
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
)
if
current_platform
.
is_cpu
():
if
current_platform
.
is_cpu
():
pytest
.
skip
(
"CPU only supports V1"
)
pytest
.
skip
(
"CPU only supports V1"
)
...
@@ -91,9 +113,46 @@ def test_prm_models(
...
@@ -91,9 +113,46 @@ def test_prm_models(
hf_model
=
step_reward_patch_hf_model
(
hf_model
)
hf_model
=
step_reward_patch_hf_model
(
hf_model
)
hf_outputs
=
hf_model
.
reward
(
math_step_prompts
)
hf_outputs
=
hf_model
.
reward
(
math_step_prompts
)
dump_reward_outputs
(
hf_outputs
,
FIXTURE_REWARD_RESULT
[
model
],
)
# check logits difference
# check logits difference
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
for
hf_output
,
vllm_output
in
zip
(
hf_outputs
,
vllm_outputs
):
hf_output
=
torch
.
tensor
(
hf_output
).
float
()
hf_output
=
torch
.
tensor
(
hf_output
).
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
float
()
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1.5e-2
)
assert
torch
.
allclose
(
hf_output
,
vllm_output
,
1.5e-2
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
pytest
.
param
(
"Qwen/Qwen2.5-Math-PRM-7B"
,
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
),
],
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_prm_models_with_golden_outputs
(
vllm_runner
,
math_step_prompts
,
model
:
str
,
dtype
:
str
,
)
->
None
:
if
not
FIXTURE_REWARD_RESULT
.
get
(
model
):
pytest
.
skip
(
f
"No available golden outputs for
{
model
}
."
)
with
vllm_runner
(
model
,
max_model_len
=
1024
,
dtype
=
dtype
)
as
vllm_model
:
vllm_outputs
=
vllm_model
.
reward
(
math_step_prompts
)
golden_outputs
=
load_reward_outputs
(
FIXTURE_REWARD_RESULT
[
model
])
# check logits difference
for
golden_output
,
vllm_output
in
zip
(
golden_outputs
,
vllm_outputs
):
golden_output
=
torch
.
tensor
(
golden_output
).
float
()
vllm_output
=
torch
.
tensor
(
vllm_output
).
float
()
assert
torch
.
allclose
(
golden_output
,
vllm_output
,
1.5e-2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment