Unverified Commit a33ea28b authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Add `flashinfer_python` to CUDA wheel requirements (#21389)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent 7b49cb1c
...@@ -386,6 +386,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist ...@@ -386,6 +386,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
# Install FlashInfer from source # Install FlashInfer from source
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt
# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel.
ARG FLASHINFER_GIT_REF="v0.2.9rc2" ARG FLASHINFER_GIT_REF="v0.2.9rc2"
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
. /etc/environment . /etc/environment
...@@ -408,7 +410,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' ...@@ -408,7 +410,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
python3 -m flashinfer.aot python3 -m flashinfer.aot
TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
uv pip install --system --no-build-isolation . uv pip install --system --no-build-isolation --force-reinstall --no-deps .
popd popd
rm -rf flashinfer rm -rf flashinfer
BASH BASH
......
...@@ -12,3 +12,5 @@ torchaudio==2.7.1 ...@@ -12,3 +12,5 @@ torchaudio==2.7.1
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
# FlashInfer should be updated together with the Dockerfile
flashinfer_python==0.2.9rc2
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment