"examples/vscode:/vscode.git/clone" did not exist on "1bec35554fc2a573c8dc242e0fd829d2c4ed0274"
Unverified Commit 135dc82e authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

chore: vllm 0.10.1.1 (#2641)

parent 3036e60b
...@@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ...@@ -13,11 +13,11 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this # Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1 ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
ARG TORCH_BACKEND="cu128" ARG TORCH_BACKEND="cu128"
# Match 0.10.1 vLLM release # Match 0.10.1.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1 # https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100: # Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'" # "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64" ARG DEEPGEMM_REF="f85ec64"
......
...@@ -20,10 +20,10 @@ set -euo pipefail ...@@ -20,10 +20,10 @@ set -euo pipefail
# Parse arguments # Parse arguments
EDITABLE=true EDITABLE=true
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1 VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command: # When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request # aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl" VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git" VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
MAX_JOBS=16 MAX_JOBS=16
INSTALLATION_DIR=/tmp INSTALLATION_DIR=/tmp
...@@ -86,13 +86,13 @@ while [[ $# -gt 0 ]]; do ...@@ -86,13 +86,13 @@ while [[ $# -gt 0 ]]; do
echo "Options:" echo "Options:"
echo " --editable Install vllm in editable mode (default)" echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode" echo " --no-editable Install vllm in non-editable mode"
echo f" --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})" echo " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
echo f" --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})" echo " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)" echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo f" --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})" echo " --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
echo f" --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})" echo " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
echo f" --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})" echo " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
echo f" --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})" echo " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
exit 0 exit 0
;; ;;
*) *)
......
...@@ -55,7 +55,7 @@ trtllm =[ ...@@ -55,7 +55,7 @@ trtllm =[
vllm = [ vllm = [
"uvloop", "uvloop",
"nixl<=0.4.1", "nixl<=0.4.1",
"vllm[flashinfer]==0.10.1", "vllm[flashinfer]==0.10.1.1",
] ]
sglang = [ sglang = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment