Unverified Commit 2ad10292 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix batch invariance nvfp4 support (#39820)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent b2f749dc
...@@ -224,6 +224,7 @@ steps: ...@@ -224,6 +224,7 @@ steps:
- pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
- VLLM_TEST_MODEL=deepseek-ai/DeepSeek-V2-Lite-Chat pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[TRITON_MLA] - VLLM_TEST_MODEL=deepseek-ai/DeepSeek-V2-Lite-Chat pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[TRITON_MLA]
- VLLM_TEST_MODEL=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[FLASH_ATTN] - VLLM_TEST_MODEL=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 pytest -v -s v1/determinism/test_batch_invariance.py::test_v1_generation_is_deterministic_across_batch_sizes_with_needle[FLASH_ATTN]
- pytest -v -s v1/determinism/test_nvfp4_batch_invariant.py
- label: Acceptance Length Test (Large Models) # optional - label: Acceptance Length Test (Large Models) # optional
timeout_in_minutes: 25 timeout_in_minutes: 25
......
...@@ -601,7 +601,13 @@ def init_nvfp4_linear_kernel() -> NvFp4LinearKernel: ...@@ -601,7 +601,13 @@ def init_nvfp4_linear_kernel() -> NvFp4LinearKernel:
# Env-var overrides. # Env-var overrides.
force_kernel: type[NvFp4LinearKernel] | None = None force_kernel: type[NvFp4LinearKernel] | None = None
if envs.VLLM_USE_FBGEMM: if envs.VLLM_BATCH_INVARIANT:
logger.info_once(
"VLLM_BATCH_INVARIANT forces NVFP4 linear to use the "
"emulation backend for deterministic execution."
)
force_kernel = EmulationNvFp4LinearKernel
elif envs.VLLM_USE_FBGEMM:
force_kernel = FbgemmNvFp4LinearKernel force_kernel = FbgemmNvFp4LinearKernel
elif envs.VLLM_USE_NVFP4_CT_EMULATIONS: elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
force_kernel = EmulationNvFp4LinearKernel force_kernel = EmulationNvFp4LinearKernel
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment