"vscode:/vscode.git/clone" did not exist on "e289998932f55dec932bf6b0363676fab99dde0d"
Unverified Commit 3fa3c22a authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

Fix fast decode plan for flashinfer v0.4.0rc1 and upgrade sgl-kernel 0.3.11 (#10634)


Co-authored-by: default avatarzhyncs <me@zhyncs.com>
parent 4f2055ad
......@@ -85,7 +85,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
&& python3 -m flashinfer --download-cubin \
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.10/sgl_kernel-0.3.10+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.11/sgl_kernel-0.3.11+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
fi
# Download source files
......
......@@ -57,12 +57,12 @@ dependencies = [
"uvicorn",
"uvloop",
"xgrammar==0.1.24",
"sgl-kernel==0.3.10",
"sgl-kernel==0.3.11",
"torch==2.8.0",
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
"flashinfer_python==0.3.1",
"flashinfer_python==0.4.0rc1",
"openai==1.99.1",
"tiktoken",
"anthropic>=0.20.0",
......
......@@ -65,7 +65,7 @@ tracing = [
srt = [
"sglang[runtime_common]",
"sgl-kernel==0.3.10",
"sgl-kernel==0.3.11",
"torch==2.8.0",
"torchaudio==2.8.0",
"torchvision",
......@@ -75,7 +75,7 @@ srt = [
blackwell = [
"sglang[runtime_common]",
"sgl-kernel==0.3.10",
"sgl-kernel==0.3.11",
"torch==2.8.0",
"torchaudio==2.8.0",
"torchvision",
......
......@@ -703,7 +703,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer_python",
"0.3.1",
"0.4.0rc1",
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
......@@ -711,7 +711,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
assert_pkg_version(
"sgl-kernel",
"0.3.10",
"0.3.11",
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
)
......
......@@ -1432,6 +1432,9 @@ def fast_decode_plan(
head_dim,
head_dim,
False, # causal
window_left,
-1,
False,
)
except Exception as e:
raise RuntimeError(f"Error in standard plan: {e}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment