[CPU] fix CPU backend sel. issue for Llama4 (#10511)

925dbb32 · Zaili Wang · GitHub · 8df7353a · 925dbb32 · 925dbb32
Unverified Commit 925dbb32 authored Sep 16, 2025 by Zaili Wang Committed by GitHub Sep 16, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

docs/platforms/cpu_server.md docs/platforms/cpu_server.md +2 -0

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +1 -1

No files found.
--- a/docs/platforms/cpu_server.md
+++ b/docs/platforms/cpu_server.md
@@ -81,6 +81,8 @@ git clone https://github.com/sgl-project/sglang.git
 cd sglang
 git checkout <YOUR-DESIRED-VERSION>
+# Use dedicated toml file
+cp python/pyproject_other.toml python/pyproject.toml
 # Install SGLang dependent libs, and build SGLang main package
 pip install --upgrade pip setuptools
 conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2648,7 +2648,7 @@ class ServerArgs:
                # use bf16 for mxfp4 triton kernels
                self.dtype = "bfloat16"
-        elif "Llama4" in model_arch:
+        elif "Llama4" in model_arch and self.device != "cpu":
            assert self.attention_backend in {
                "fa3",
                "aiter",