Unverified Commit 8441baad authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

fix: update model runner (#5934)

parent 256c4c25
...@@ -81,7 +81,6 @@ from sglang.srt.utils import ( ...@@ -81,7 +81,6 @@ from sglang.srt.utils import (
get_available_gpu_memory, get_available_gpu_memory,
get_bool_env_var, get_bool_env_var,
init_custom_process_group, init_custom_process_group,
is_ampere_with_cuda_12_3,
is_cuda, is_cuda,
is_fa3_default_architecture, is_fa3_default_architecture,
is_flashinfer_available, is_flashinfer_available,
...@@ -264,7 +263,7 @@ class ModelRunner: ...@@ -264,7 +263,7 @@ class ModelRunner:
if not self.use_mla_backend: if not self.use_mla_backend:
# MHA architecture # MHA architecture
if ( if (
(is_ampere_with_cuda_12_3() or is_hopper_with_cuda_12_3()) is_hopper_with_cuda_12_3()
and is_no_spec_infer_or_topk_one(server_args) and is_no_spec_infer_or_topk_one(server_args)
and is_fa3_default_architecture(self.model_config.hf_config) and is_fa3_default_architecture(self.model_config.hf_config)
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment