Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
497bc831
Unverified
Commit
497bc831
authored
Feb 20, 2025
by
Michael Goin
Committed by
GitHub
Feb 19, 2025
Browse files
[CI/Build] Use uv in the Dockerfile (#13566)
parent
3738e6fa
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
13 deletions
+19
-13
Dockerfile
Dockerfile
+19
-13
No files found.
Dockerfile
View file @
497bc831
...
@@ -27,6 +27,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
...
@@ -27,6 +27,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&&
ln
-sf
/usr/bin/python
${
PYTHON_VERSION
}
-config
/usr/bin/python3-config
\
&&
ln
-sf
/usr/bin/python
${
PYTHON_VERSION
}
-config
/usr/bin/python3-config
\
&&
curl
-sS
https://bootstrap.pypa.io/get-pip.py | python
${
PYTHON_VERSION
}
\
&&
curl
-sS
https://bootstrap.pypa.io/get-pip.py | python
${
PYTHON_VERSION
}
\
&&
python3
--version
&&
python3
-m
pip
--version
&&
python3
--version
&&
python3
-m
pip
--version
# Install uv for faster pip installs
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
uv
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
# as it was causing spam when compiling the CUTLASS kernels
...
@@ -52,13 +55,13 @@ WORKDIR /workspace
...
@@ -52,13 +55,13 @@ WORKDIR /workspace
# after this step
# after this step
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
python3
-m
pip
install
--index-url
https://download.pytorch.org/whl/nightly/cu126
"torch==2.7.0.dev20250121+cu126"
"torchvision==0.22.0.dev20250121"
;
\
uv
pip
install
--system
--index-url
https://download.pytorch.org/whl/nightly/cu126
"torch==2.7.0.dev20250121+cu126"
"torchvision==0.22.0.dev20250121"
;
\
fi
fi
COPY
requirements-common.txt requirements-common.txt
COPY
requirements-common.txt requirements-common.txt
COPY
requirements-cuda.txt requirements-cuda.txt
COPY
requirements-cuda.txt requirements-cuda.txt
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-cuda.txt
uv
pip
install
--system
-r
requirements-cuda.txt
# cuda arch list used by torch
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# can be useful for both `dev` and `test`
...
@@ -79,7 +82,7 @@ ARG TARGETPLATFORM
...
@@ -79,7 +82,7 @@ ARG TARGETPLATFORM
COPY
requirements-build.txt requirements-build.txt
COPY
requirements-build.txt requirements-build.txt
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-build.txt
uv
pip
install
--system
-r
requirements-build.txt
COPY
. .
COPY
. .
ARG
GIT_REPO_CHECK=0
ARG
GIT_REPO_CHECK=0
...
@@ -144,7 +147,7 @@ COPY requirements-lint.txt requirements-lint.txt
...
@@ -144,7 +147,7 @@ COPY requirements-lint.txt requirements-lint.txt
COPY
requirements-test.txt requirements-test.txt
COPY
requirements-test.txt requirements-test.txt
COPY
requirements-dev.txt requirements-dev.txt
COPY
requirements-dev.txt requirements-dev.txt
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-dev.txt
uv
pip
install
--system
-r
requirements-dev.txt
#################### DEV IMAGE ####################
#################### DEV IMAGE ####################
#################### vLLM installation IMAGE ####################
#################### vLLM installation IMAGE ####################
...
@@ -174,6 +177,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
...
@@ -174,6 +177,9 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&&
ln
-sf
/usr/bin/python
${
PYTHON_VERSION
}
-config
/usr/bin/python3-config
\
&&
ln
-sf
/usr/bin/python
${
PYTHON_VERSION
}
-config
/usr/bin/python3-config
\
&&
curl
-sS
https://bootstrap.pypa.io/get-pip.py | python
${
PYTHON_VERSION
}
\
&&
curl
-sS
https://bootstrap.pypa.io/get-pip.py | python
${
PYTHON_VERSION
}
\
&&
python3
--version
&&
python3
-m
pip
--version
&&
python3
--version
&&
python3
-m
pip
--version
# Install uv for faster pip installs
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
uv
# Workaround for https://github.com/openai/triton/issues/2507 and
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
...
@@ -187,13 +193,13 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
...
@@ -187,13 +193,13 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# after this step
# after this step
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
python3
-m
pip
install
--index-url
https://download.pytorch.org/whl/nightly/cu124
"torch==2.6.0.dev20241210+cu124"
"torchvision==0.22.0.dev20241215"
;
\
uv
pip
install
--system
--index-url
https://download.pytorch.org/whl/nightly/cu124
"torch==2.6.0.dev20241210+cu124"
"torchvision==0.22.0.dev20241215"
;
\
fi
fi
# Install vllm wheel first, so that torch etc will be installed.
# Install vllm wheel first, so that torch etc will be installed.
RUN
--mount
=
type
=
bind
,from
=
build,src
=
/workspace/dist,target
=
/vllm-workspace/dist
\
RUN
--mount
=
type
=
bind
,from
=
build,src
=
/workspace/dist,target
=
/vllm-workspace/dist
\
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
dist/
*
.whl
--verbose
uv
pip
install
--system
dist/
*
.whl
--verbose
# If we need to build FlashInfer wheel before its release:
# If we need to build FlashInfer wheel before its release:
# $ export FLASHINFER_ENABLE_AOT=1
# $ export FLASHINFER_ENABLE_AOT=1
...
@@ -210,7 +216,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
...
@@ -210,7 +216,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
.
/etc/environment
&&
\
.
/etc/environment
&&
\
if
[
"
$TARGETPLATFORM
"
!=
"linux/arm64"
]
;
then
\
if
[
"
$TARGETPLATFORM
"
!=
"linux/arm64"
]
;
then
\
python3
-m
pip
install
https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl
;
\
uv
pip
install
--system
https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl
;
\
fi
fi
COPY
examples examples
COPY
examples examples
...
@@ -220,7 +226,7 @@ COPY examples examples
...
@@ -220,7 +226,7 @@ COPY examples examples
# TODO: Remove this once FlashInfer AOT wheel is fixed
# TODO: Remove this once FlashInfer AOT wheel is fixed
COPY
requirements-build.txt requirements-build.txt
COPY
requirements-build.txt requirements-build.txt
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-build.txt
uv
pip
install
--system
-r
requirements-build.txt
#################### vLLM installation IMAGE ####################
#################### vLLM installation IMAGE ####################
...
@@ -233,15 +239,15 @@ ADD . /vllm-workspace/
...
@@ -233,15 +239,15 @@ ADD . /vllm-workspace/
# install development dependencies (for testing)
# install development dependencies (for testing)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-r
requirements-dev.txt
uv
pip
install
--system
-r
requirements-dev.txt
# install development dependencies (for testing)
# install development dependencies (for testing)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
-e
tests/vllm_test_utils
uv
pip
install
--system
-e
tests/vllm_test_utils
# enable fast downloads from hf (for testing)
# enable fast downloads from hf (for testing)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
hf_transfer
uv
pip
install
--system
hf_transfer
ENV
HF_HUB_ENABLE_HF_TRANSFER 1
ENV
HF_HUB_ENABLE_HF_TRANSFER 1
# Copy in the v1 package for testing (it isn't distributed yet)
# Copy in the v1 package for testing (it isn't distributed yet)
...
@@ -262,9 +268,9 @@ FROM vllm-base AS vllm-openai-base
...
@@ -262,9 +268,9 @@ FROM vllm-base AS vllm-openai-base
# install additional dependencies for openai api server
# install additional dependencies for openai api server
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
if
[
"
$TARGETPLATFORM
"
=
"linux/arm64"
]
;
then
\
pip
install
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.42.0'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
uv
pip
install
--system
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.42.0'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
else
\
else
\
pip
install
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.45.0'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
uv
pip
install
--system
accelerate hf_transfer
'modelscope!=1.15.0'
'bitsandbytes>=0.45.0'
'timm==0.9.10'
boto3 runai-model-streamer runai-model-streamer[s3]
;
\
fi
fi
ENV
VLLM_USAGE_SOURCE production-docker-image
ENV
VLLM_USAGE_SOURCE production-docker-image
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment