Unverified Commit 135dc82e authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

chore: vllm 0.10.1.1 (#2641)

parent 3036e60b
......@@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
ARG TORCH_BACKEND="cu128"
# Match 0.10.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1
# Match 0.10.1.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64"
......
......@@ -20,10 +20,10 @@ set -euo pipefail
# Parse arguments
EDITABLE=true
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
MAX_JOBS=16
INSTALLATION_DIR=/tmp
......@@ -86,13 +86,13 @@ while [[ $# -gt 0 ]]; do
echo "Options:"
echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode"
echo f" --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo f" --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo f" --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
echo f" --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
echo f" --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
echo f" --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
echo " --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
echo " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
echo " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
exit 0
;;
*)
......
......@@ -55,7 +55,7 @@ trtllm =[
vllm = [
"uvloop",
"nixl<=0.4.1",
"vllm[flashinfer]==0.10.1",
"vllm[flashinfer]==0.10.1.1",
]
sglang = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment