Unverified Commit 3fa3c22a authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

Fix fast decode plan for flashinfer v0.4.0rc1 and upgrade sgl-kernel 0.3.11 (#10634)


Co-authored-by: default avatarzhyncs <me@zhyncs.com>
parent 4f2055ad
...@@ -85,7 +85,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li ...@@ -85,7 +85,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \ && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
&& python3 -m flashinfer --download-cubin \ && python3 -m flashinfer --download-cubin \
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \ && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.10/sgl_kernel-0.3.10+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.11/sgl_kernel-0.3.11+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
fi fi
# Download source files # Download source files
......
...@@ -57,12 +57,12 @@ dependencies = [ ...@@ -57,12 +57,12 @@ dependencies = [
"uvicorn", "uvicorn",
"uvloop", "uvloop",
"xgrammar==0.1.24", "xgrammar==0.1.24",
"sgl-kernel==0.3.10", "sgl-kernel==0.3.11",
"torch==2.8.0", "torch==2.8.0",
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
"cuda-python", "cuda-python",
"flashinfer_python==0.3.1", "flashinfer_python==0.4.0rc1",
"openai==1.99.1", "openai==1.99.1",
"tiktoken", "tiktoken",
"anthropic>=0.20.0", "anthropic>=0.20.0",
......
...@@ -65,7 +65,7 @@ tracing = [ ...@@ -65,7 +65,7 @@ tracing = [
srt = [ srt = [
"sglang[runtime_common]", "sglang[runtime_common]",
"sgl-kernel==0.3.10", "sgl-kernel==0.3.11",
"torch==2.8.0", "torch==2.8.0",
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
...@@ -75,7 +75,7 @@ srt = [ ...@@ -75,7 +75,7 @@ srt = [
blackwell = [ blackwell = [
"sglang[runtime_common]", "sglang[runtime_common]",
"sgl-kernel==0.3.10", "sgl-kernel==0.3.11",
"torch==2.8.0", "torch==2.8.0",
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
......
...@@ -703,7 +703,7 @@ def _set_envs_and_config(server_args: ServerArgs): ...@@ -703,7 +703,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer": if server_args.attention_backend == "flashinfer":
assert_pkg_version( assert_pkg_version(
"flashinfer_python", "flashinfer_python",
"0.3.1", "0.4.0rc1",
"Please uninstall the old version and " "Please uninstall the old version and "
"reinstall the latest version by following the instructions " "reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.", "at https://docs.flashinfer.ai/installation.html.",
...@@ -711,7 +711,7 @@ def _set_envs_and_config(server_args: ServerArgs): ...@@ -711,7 +711,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"): if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
assert_pkg_version( assert_pkg_version(
"sgl-kernel", "sgl-kernel",
"0.3.10", "0.3.11",
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`", "Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
) )
......
...@@ -1432,6 +1432,9 @@ def fast_decode_plan( ...@@ -1432,6 +1432,9 @@ def fast_decode_plan(
head_dim, head_dim,
head_dim, head_dim,
False, # causal False, # causal
window_left,
-1,
False,
) )
except Exception as e: except Exception as e:
raise RuntimeError(f"Error in standard plan: {e}") raise RuntimeError(f"Error in standard plan: {e}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment