Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0c3543d7
Unverified
Commit
0c3543d7
authored
Nov 02, 2025
by
Yineng Zhang
Committed by
GitHub
Nov 02, 2025
Browse files
chore: upgrade flashinfer 0.5.0 (#12523)
Co-authored-by:
Baizhou Zhang
<
sobereddiezhang@gmail.com
>
parent
6a3b9fd0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
12 additions
and
5 deletions
+12
-5
python/pyproject.toml
python/pyproject.toml
+3
-1
python/sglang/check_env.py
python/sglang/check_env.py
+2
-0
python/sglang/srt/entrypoints/engine.py
python/sglang/srt/entrypoints/engine.py
+1
-1
python/sglang/srt/utils/common.py
python/sglang/srt/utils/common.py
+3
-1
scripts/ci/ci_install_dependency.sh
scripts/ci/ci_install_dependency.sh
+2
-1
sgl-kernel/build.sh
sgl-kernel/build.sh
+1
-1
No files found.
python/pyproject.toml
View file @
0c3543d7
...
@@ -26,7 +26,9 @@ dependencies = [
...
@@ -26,7 +26,9 @@ dependencies = [
"datasets"
,
"datasets"
,
"einops"
,
"einops"
,
"fastapi"
,
"fastapi"
,
"flashinfer_python==0.4.1"
,
"flashinfer_python==0.5.0"
,
"flashinfer_cubin==0.5.0"
,
"flashinfer_jit_cache==0.5.0"
,
"gguf"
,
"gguf"
,
"hf_transfer"
,
"hf_transfer"
,
"huggingface_hub"
,
"huggingface_hub"
,
...
...
python/sglang/check_env.py
View file @
0c3543d7
...
@@ -22,6 +22,8 @@ PACKAGE_LIST = [
...
@@ -22,6 +22,8 @@ PACKAGE_LIST = [
"sglang"
,
"sglang"
,
"sgl_kernel"
,
"sgl_kernel"
,
"flashinfer_python"
,
"flashinfer_python"
,
"flashinfer_cubin"
,
"flashinfer_jit_cache"
,
"triton"
,
"triton"
,
"transformers"
,
"transformers"
,
"torchao"
,
"torchao"
,
...
...
python/sglang/srt/entrypoints/engine.py
View file @
0c3543d7
...
@@ -712,7 +712,7 @@ def _set_envs_and_config(server_args: ServerArgs):
...
@@ -712,7 +712,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if
server_args
.
attention_backend
==
"flashinfer"
:
if
server_args
.
attention_backend
==
"flashinfer"
:
assert_pkg_version
(
assert_pkg_version
(
"flashinfer_python"
,
"flashinfer_python"
,
"0.
4.1
"
,
"0.
5.0
"
,
"Please uninstall the old version and "
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html."
,
"at https://docs.flashinfer.ai/installation.html."
,
...
...
python/sglang/srt/utils/common.py
View file @
0c3543d7
...
@@ -2386,7 +2386,9 @@ def set_cuda_arch():
...
@@ -2386,7 +2386,9 @@ def set_cuda_arch():
if
is_flashinfer_available
():
if
is_flashinfer_available
():
capability
=
torch
.
cuda
.
get_device_capability
()
capability
=
torch
.
cuda
.
get_device_capability
()
arch
=
f
"
{
capability
[
0
]
}
.
{
capability
[
1
]
}
"
arch
=
f
"
{
capability
[
0
]
}
.
{
capability
[
1
]
}
"
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
f
"
{
arch
}{
'+PTX'
if
arch
==
'9.0'
else
''
}
"
os
.
environ
[
"FLASHINFER_CUDA_ARCH_LIST"
]
=
(
f
"
{
arch
}{
'a'
if
capability
[
0
]
>=
9
else
''
}
"
)
def
next_power_of_2
(
n
:
int
):
def
next_power_of_2
(
n
:
int
):
...
...
scripts/ci/ci_install_dependency.sh
View file @
0c3543d7
...
@@ -23,6 +23,7 @@ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
...
@@ -23,6 +23,7 @@ echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
# Clear torch compilation cache
# Clear torch compilation cache
python3
-c
'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
python3
-c
'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
rm
-rf
/root/.cache/flashinfer
rm
-rf
/root/.cache/flashinfer
pip3 uninstall flashinfer-python flashinfer-cubin flashinfer-jit-cache
||
true
# Install apt packages
# Install apt packages
apt
install
-y
git libnuma-dev libssl-dev pkg-config
apt
install
-y
git libnuma-dev libssl-dev pkg-config
...
@@ -93,7 +94,7 @@ else
...
@@ -93,7 +94,7 @@ else
fi
fi
# Install the main package
# Install the main package
$PIP_CMD
install
-e
"python[dev]"
--extra-index-url
https://download.pytorch.org/whl/
${
CU_VERSION
}
$PIP_INSTALL_SUFFIX
$PIP_CMD
install
-e
"python[dev]"
--extra-index-url
https://download.pytorch.org/whl/
${
CU_VERSION
}
--extra-index-url
https://flashinfer.ai/whl/
${
CU_VERSION
}
$PIP_INSTALL_SUFFIX
# Install router for pd-disagg test
# Install router for pd-disagg test
$PIP_CMD
install
sglang-router
$PIP_INSTALL_SUFFIX
$PIP_CMD
install
sglang-router
$PIP_INSTALL_SUFFIX
...
...
sgl-kernel/build.sh
View file @
0c3543d7
...
@@ -147,7 +147,7 @@ docker run --rm \
...
@@ -147,7 +147,7 @@ docker run --rm \
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so &&
\
ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so &&
\
${
PYTHON_ROOT_PATH
}
/bin/
${
TORCH_INSTALL
}
&&
\
${
PYTHON_ROOT_PATH
}
/bin/
${
TORCH_INSTALL
}
&&
\
${
PYTHON_ROOT_PATH
}
/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core &&
\
${
PYTHON_ROOT_PATH
}
/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core &&
\
export
TORCH
_CUDA_ARCH_LIST='8.0 8.9 9.0
+PTX
' &&
\
export
FLASHINFER
_CUDA_ARCH_LIST='8.0 8.9 9.0
a 10.0a 12.0a
' &&
\
export CUDA_VERSION=
${
CUDA_VERSION
}
&&
\
export CUDA_VERSION=
${
CUDA_VERSION
}
&&
\
mkdir -p /usr/lib/
${
ARCH
}
-linux-gnu/ &&
\
mkdir -p /usr/lib/
${
ARCH
}
-linux-gnu/ &&
\
ln -s /usr/local/cuda-
${
CUDA_VERSION
}
/targets/
${
LIBCUDA_ARCH
}
-linux/lib/stubs/libcuda.so /usr/lib/
${
ARCH
}
-linux-gnu/libcuda.so &&
\
ln -s /usr/local/cuda-
${
CUDA_VERSION
}
/targets/
${
LIBCUDA_ARCH
}
-linux/lib/stubs/libcuda.so /usr/lib/
${
ARCH
}
-linux-gnu/libcuda.so &&
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment