Unverified Commit 925dbb32 authored by Zaili Wang's avatar Zaili Wang Committed by GitHub
Browse files

[CPU] fix CPU backend sel. issue for Llama4 (#10511)

parent 8df7353a
...@@ -81,6 +81,8 @@ git clone https://github.com/sgl-project/sglang.git ...@@ -81,6 +81,8 @@ git clone https://github.com/sgl-project/sglang.git
cd sglang cd sglang
git checkout <YOUR-DESIRED-VERSION> git checkout <YOUR-DESIRED-VERSION>
# Use dedicated toml file
cp python/pyproject_other.toml python/pyproject.toml
# Install SGLang dependent libs, and build SGLang main package # Install SGLang dependent libs, and build SGLang main package
pip install --upgrade pip setuptools pip install --upgrade pip setuptools
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
......
...@@ -2648,7 +2648,7 @@ class ServerArgs: ...@@ -2648,7 +2648,7 @@ class ServerArgs:
# use bf16 for mxfp4 triton kernels # use bf16 for mxfp4 triton kernels
self.dtype = "bfloat16" self.dtype = "bfloat16"
elif "Llama4" in model_arch: elif "Llama4" in model_arch and self.device != "cpu":
assert self.attention_backend in { assert self.attention_backend in {
"fa3", "fa3",
"aiter", "aiter",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment