Unverified Commit 1945f599 authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

build: Bump vllm and deepgemm version (#2509)

parent bec1dd54
...@@ -13,14 +13,14 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ...@@ -13,14 +13,14 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this # Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="ba81acbdc1eec643ba815a76628ae3e4b2263b76" ARG VLLM_REF="77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
ARG TORCH_BACKEND="cu128" ARG TORCH_BACKEND="cu128"
# Match 0.10.0 vLLM release # Match 0.10.0 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.0 # https://github.com/vllm-project/vllm/releases/tag/v0.10.0
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100: # Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'" # "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="03d0be3" ARG DEEPGEMM_REF="f85ec64"
ARG FLASHINF_REF="v0.2.8rc1" ARG FLASHINF_REF="v0.2.8rc1"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
......
...@@ -20,12 +20,12 @@ set -euo pipefail ...@@ -20,12 +20,12 @@ set -euo pipefail
# Parse arguments # Parse arguments
EDITABLE=true EDITABLE=true
VLLM_REF="ba81acbdc1eec643ba815a76628ae3e4b2263b76" VLLM_REF="77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git" VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
MAX_JOBS=16 MAX_JOBS=16
INSTALLATION_DIR=/tmp INSTALLATION_DIR=/tmp
ARCH=$(uname -m) ARCH=$(uname -m)
DEEPGEMM_REF="03d0be3" DEEPGEMM_REF="f85ec64"
FLASHINF_REF="v0.2.8rc1" FLASHINF_REF="v0.2.8rc1"
TORCH_BACKEND="cu128" TORCH_BACKEND="cu128"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment