Unverified Commit 7920e9b1 authored by Isotr0py's avatar Isotr0py Committed by GitHub
Browse files

[Bugfix] Fix failing GPT-OSS initialization test (#22557)


Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent b7c0942b
...@@ -200,7 +200,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -200,7 +200,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
{"6b": "EleutherAI/gpt-j-6b"}), {"6b": "EleutherAI/gpt-j-6b"}),
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m", "GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
{"1b": "EleutherAI/pythia-1.4b"}), {"1b": "EleutherAI/pythia-1.4b"}),
"GptOssForCausalLM": _HfExamplesInfo("openai/gpt-oss-20b"), "GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"),
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"), "GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"), "GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501 "GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501
......
...@@ -68,6 +68,11 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, ...@@ -68,6 +68,11 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
if model_arch == "Phi4FlashForCausalLM": if model_arch == "Phi4FlashForCausalLM":
# Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend # Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend
m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN") m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
if model_arch == "GptOssForCausalLM":
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
# L4 supports FA3.
m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
LLM( LLM(
model_info.default, model_info.default,
tokenizer=model_info.tokenizer, tokenizer=model_info.tokenizer,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment