Unverified Commit e314b084 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[FIX] Fix the nightly CI by disabling swa mem pool for gemma2 (#8693)

parent 403566bc
...@@ -1928,6 +1928,12 @@ class ServerArgs: ...@@ -1928,6 +1928,12 @@ class ServerArgs:
if "Llama4" in model_arch: if "Llama4" in model_arch:
assert self.attention_backend == "fa3", "fa3 is required for Llama4 model" assert self.attention_backend == "fa3", "fa3 is required for Llama4 model"
if "Gemma2ForCausalLM" in model_arch:
# FIXME: https://github.com/sgl-project/sglang/pull/7367 is not compatible with gemma2 model.
# It failed at this test: https://github.com/sgl-project/sglang/actions/runs/16255155597/job/45890331952#step:4:736
logger.warning("Disable hybrid SWA memory for Gemma2ForCausalLM.")
self.disable_hybrid_swa_memory = True
# Check LoRA # Check LoRA
self.check_lora_server_args() self.check_lora_server_args()
......
...@@ -27,9 +27,6 @@ import torch.nn.functional as F ...@@ -27,9 +27,6 @@ import torch.nn.functional as F
from sglang.bench_serving import run_benchmark from sglang.bench_serving import run_benchmark
from sglang.global_config import global_config from sglang.global_config import global_config
from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.lang.interpreter import ProgramState
from sglang.srt.utils import ( from sglang.srt.utils import (
get_bool_env_var, get_bool_env_var,
get_device, get_device,
...@@ -358,6 +355,9 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser): ...@@ -358,6 +355,9 @@ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
def select_sglang_backend(args: argparse.Namespace): def select_sglang_backend(args: argparse.Namespace):
from sglang.lang.backend.openai import OpenAI
from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
if args.backend.startswith("srt"): if args.backend.startswith("srt"):
if args.backend == "srt-no-parallel": if args.backend == "srt-no-parallel":
global_config.enable_parallel_encoding = False global_config.enable_parallel_encoding = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment