Unverified Commit 367856de authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[CI/Build] Revive skipped reward models e2e test (#31665)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent da436f86
[[[0.0006361007690429688, 0.99951171875], [0.81884765625, 0.1812744140625], [0.025543212890625, 0.974609375], [0.0004382133483886719, 0.99951171875]]]
\ No newline at end of file
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from typing import TYPE_CHECKING
import pytest import pytest
import torch import torch
...@@ -9,7 +11,18 @@ from transformers import AutoModel ...@@ -9,7 +11,18 @@ from transformers import AutoModel
from vllm.platforms import current_platform from vllm.platforms import current_platform
from ....conftest import HfRunner from ....conftest import HfRunner
from ...utils import check_transformers_version from ....utils import VLLM_PATH
from ...registry import HF_EXAMPLE_MODELS
if TYPE_CHECKING:
from _typeshed import StrPath
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
assert FIXTURES_PATH.exists()
FIXTURE_REWARD_RESULT = {
"Qwen/Qwen2.5-Math-PRM-7B": FIXTURES_PATH / "qwen2_5_math_prm_reward_step.json",
}
@pytest.fixture @pytest.fixture
...@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner): ...@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner):
return hf_model return hf_model
def dump_reward_outputs(outputs: list[list[float]], filename: "StrPath"):
with open(filename, "w", encoding="utf-8") as f:
json.dump(outputs, f)
def load_reward_outputs(filename: "StrPath") -> list[list[float]]:
with open(filename, encoding="utf-8") as f:
return json.load(f)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ [
...@@ -77,9 +100,8 @@ def test_prm_models( ...@@ -77,9 +100,8 @@ def test_prm_models(
model: str, model: str,
dtype: str, dtype: str,
) -> None: ) -> None:
check_transformers_version( model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
"Qwen/Qwen2.5-Math-PRM-7B", max_transformers_version="4.53.2" model_info.check_transformers_version(on_fail="skip")
)
if current_platform.is_cpu(): if current_platform.is_cpu():
pytest.skip("CPU only supports V1") pytest.skip("CPU only supports V1")
...@@ -91,9 +113,46 @@ def test_prm_models( ...@@ -91,9 +113,46 @@ def test_prm_models(
hf_model = step_reward_patch_hf_model(hf_model) hf_model = step_reward_patch_hf_model(hf_model)
hf_outputs = hf_model.reward(math_step_prompts) hf_outputs = hf_model.reward(math_step_prompts)
dump_reward_outputs(
hf_outputs,
FIXTURE_REWARD_RESULT[model],
)
# check logits difference # check logits difference
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs): for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
hf_output = torch.tensor(hf_output).float() hf_output = torch.tensor(hf_output).float()
vllm_output = torch.tensor(vllm_output).float() vllm_output = torch.tensor(vllm_output).float()
assert torch.allclose(hf_output, vllm_output, 1.5e-2) assert torch.allclose(hf_output, vllm_output, 1.5e-2)
@pytest.mark.parametrize(
"model",
[
pytest.param(
"Qwen/Qwen2.5-Math-PRM-7B",
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
],
)
@pytest.mark.parametrize("dtype", ["half"])
def test_prm_models_with_golden_outputs(
vllm_runner,
math_step_prompts,
model: str,
dtype: str,
) -> None:
if not FIXTURE_REWARD_RESULT.get(model):
pytest.skip(f"No available golden outputs for {model}.")
with vllm_runner(model, max_model_len=1024, dtype=dtype) as vllm_model:
vllm_outputs = vllm_model.reward(math_step_prompts)
golden_outputs = load_reward_outputs(FIXTURE_REWARD_RESULT[model])
# check logits difference
for golden_output, vllm_output in zip(golden_outputs, vllm_outputs):
golden_output = torch.tensor(golden_output).float()
vllm_output = torch.tensor(vllm_output).float()
assert torch.allclose(golden_output, vllm_output, 1.5e-2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment