Commit 675ba75f authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.8.3' into v0.8.3-ori

parents 5cc98918 296c6572
...@@ -12,7 +12,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}} ...@@ -12,7 +12,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
# Install some basic utilities # Install some basic utilities
RUN apt-get update -q -y && apt-get install -q -y \ RUN apt-get update -q -y && apt-get install -q -y \
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
apt-transport-https ca-certificates wget curl
# Remove sccache # Remove sccache
RUN python3 -m pip install --upgrade pip && pip install setuptools_scm RUN python3 -m pip install --upgrade pip && pip install setuptools_scm
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)" RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
......
ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.3.1-complete ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.3.1-complete
ARG HIPBLASLT_BRANCH="4d40e36" ARG HIPBLASLT_BRANCH="db8e93b4"
ARG HIPBLAS_COMMON_BRANCH="7c1566b" ARG HIPBLAS_COMMON_BRANCH="7c1566b"
ARG LEGACY_HIPBLASLT_OPTION= ARG LEGACY_HIPBLASLT_OPTION=
ARG RCCL_BRANCH="648a58d" ARG RCCL_BRANCH="648a58d"
ARG RCCL_REPO="https://github.com/ROCm/rccl" ARG RCCL_REPO="https://github.com/ROCm/rccl"
ARG TRITON_BRANCH="e5be006" ARG TRITON_BRANCH="e5be006"
ARG TRITON_REPO="https://github.com/triton-lang/triton.git" ARG TRITON_REPO="https://github.com/triton-lang/triton.git"
ARG PYTORCH_BRANCH="3a585126" ARG PYTORCH_BRANCH="295f2ed4"
ARG PYTORCH_VISION_BRANCH="v0.19.1" ARG PYTORCH_VISION_BRANCH="v0.21.0"
ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git" ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
ARG FA_BRANCH="b7d29fb" ARG FA_BRANCH="1a7f4dfa"
ARG FA_REPO="https://github.com/ROCm/flash-attention.git" ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
ARG AITER_BRANCH="21d47a9" ARG AITER_BRANCH="8970b25b"
ARG AITER_REPO="https://github.com/ROCm/aiter.git" ARG AITER_REPO="https://github.com/ROCm/aiter.git"
FROM ${BASE_IMAGE} AS base FROM ${BASE_IMAGE} AS base
...@@ -20,7 +20,7 @@ FROM ${BASE_IMAGE} AS base ...@@ -20,7 +20,7 @@ FROM ${BASE_IMAGE} AS base
ENV PATH=/opt/rocm/llvm/bin:$PATH ENV PATH=/opt/rocm/llvm/bin:$PATH
ENV ROCM_PATH=/opt/rocm ENV ROCM_PATH=/opt/rocm
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib: ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942 ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
...@@ -31,7 +31,7 @@ ENV DEBIAN_FRONTEND=noninteractive ...@@ -31,7 +31,7 @@ ENV DEBIAN_FRONTEND=noninteractive
# Install Python and other dependencies # Install Python and other dependencies
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y software-properties-common git curl sudo vim less \ && apt-get install -y software-properties-common git curl sudo vim less libgfortran5 \
&& add-apt-repository ppa:deadsnakes/ppa \ && add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update -y \ && apt-get update -y \
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
...@@ -42,7 +42,7 @@ RUN apt-get update -y \ ...@@ -42,7 +42,7 @@ RUN apt-get update -y \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
&& python3 --version && python3 -m pip --version && python3 --version && python3 -m pip --version
RUN pip install -U packaging cmake ninja wheel setuptools pybind11 Cython RUN pip install -U packaging 'cmake<4' ninja wheel setuptools pybind11 Cython
FROM base AS build_hipblaslt FROM base AS build_hipblaslt
ARG HIPBLASLT_BRANCH ARG HIPBLASLT_BRANCH
...@@ -60,7 +60,8 @@ RUN cd hipBLAS-common \ ...@@ -60,7 +60,8 @@ RUN cd hipBLAS-common \
RUN git clone https://github.com/ROCm/hipBLASLt RUN git clone https://github.com/ROCm/hipBLASLt
RUN cd hipBLASLt \ RUN cd hipBLASLt \
&& git checkout ${HIPBLASLT_BRANCH} \ && git checkout ${HIPBLASLT_BRANCH} \
&& ./install.sh -d --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \ && apt-get install -y llvm-dev \
&& ./install.sh -dc --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \
&& cd build/release \ && cd build/release \
&& make package && make package
RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install
...@@ -110,11 +111,24 @@ RUN git clone ${FA_REPO} ...@@ -110,11 +111,24 @@ RUN git clone ${FA_REPO}
RUN cd flash-attention \ RUN cd flash-attention \
&& git checkout ${FA_BRANCH} \ && git checkout ${FA_BRANCH} \
&& git submodule update --init \ && git submodule update --init \
&& MAX_JOBS=64 GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \ RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
&& cp /app/vision/dist/*.whl /app/install \ && cp /app/vision/dist/*.whl /app/install \
&& cp /app/flash-attention/dist/*.whl /app/install && cp /app/flash-attention/dist/*.whl /app/install
FROM base AS build_aiter
ARG AITER_BRANCH
ARG AITER_REPO
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl
RUN git clone --recursive ${AITER_REPO}
RUN cd aiter \
&& git checkout ${AITER_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt
RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl
RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install
FROM base AS final FROM base AS final
RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \
dpkg -i /install/*deb \ dpkg -i /install/*deb \
...@@ -130,19 +144,12 @@ RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ ...@@ -130,19 +144,12 @@ RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
pip install /install/*.whl pip install /install/*.whl
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl pip install /install/*.whl
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
ARG AITER_REPO pip install /install/*.whl
ARG AITER_BRANCH
RUN git clone --recursive ${AITER_REPO}
RUN cd aiter \
&& git checkout ${AITER_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt \
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop && pip show aiter
ARG BASE_IMAGE ARG BASE_IMAGE
ARG HIPBLASLT_BRANCH
ARG HIPBLAS_COMMON_BRANCH ARG HIPBLAS_COMMON_BRANCH
ARG HIPBLASLT_BRANCH
ARG LEGACY_HIPBLASLT_OPTION ARG LEGACY_HIPBLASLT_OPTION
ARG RCCL_BRANCH ARG RCCL_BRANCH
ARG RCCL_REPO ARG RCCL_REPO
...@@ -154,6 +161,8 @@ ARG PYTORCH_REPO ...@@ -154,6 +161,8 @@ ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO ARG PYTORCH_VISION_REPO
ARG FA_BRANCH ARG FA_BRANCH
ARG FA_REPO ARG FA_REPO
ARG AITER_BRANCH
ARG AITER_REPO
RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \
&& echo "HIPBLAS_COMMON_BRANCH: ${HIPBLAS_COMMON_BRANCH}" >> /app/versions.txt \ && echo "HIPBLAS_COMMON_BRANCH: ${HIPBLAS_COMMON_BRANCH}" >> /app/versions.txt \
&& echo "HIPBLASLT_BRANCH: ${HIPBLASLT_BRANCH}" >> /app/versions.txt \ && echo "HIPBLASLT_BRANCH: ${HIPBLASLT_BRANCH}" >> /app/versions.txt \
...@@ -167,6 +176,5 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ ...@@ -167,6 +176,5 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \
&& echo "PYTORCH_REPO: ${PYTORCH_REPO}" >> /app/versions.txt \ && echo "PYTORCH_REPO: ${PYTORCH_REPO}" >> /app/versions.txt \
&& echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \ && echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \
&& echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \ && echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \
&& echo "FA_REPO: ${FA_REPO}" >> /app/versions.txt \
&& echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \ && echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \
&& echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt && echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt
...@@ -2,19 +2,42 @@ ...@@ -2,19 +2,42 @@
## Build the docs ## Build the docs
- Make sure in `docs` directory
```bash
cd docs
```
- Install the dependencies:
```bash ```bash
# Install dependencies.
pip install -r ../requirements/docs.txt pip install -r ../requirements/docs.txt
```
- Clean the previous build (optional but recommended):
# Build the docs. ```bash
make clean make clean
```
- Generate the HTML documentation:
```bash
make html make html
``` ```
## Open the docs with your browser ## Open the docs with your browser
- Serve the documentation locally:
```bash ```bash
python -m http.server -d build/html/ python -m http.server -d build/html/
``` ```
Launch your browser and open localhost:8000. This will start a local server at http://localhost:8000. You can now open your browser and view the documentation.
If port 8000 is already in use, you can specify a different port, for example:
```bash
python -m http.server 3000 -d build/html/
```
...@@ -10,8 +10,8 @@ document.addEventListener("DOMContentLoaded", function () { ...@@ -10,8 +10,8 @@ document.addEventListener("DOMContentLoaded", function () {
script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget. script.setAttribute("runllm-keyboard-shortcut", "Mod+j"); // cmd-j or ctrl-j to open the widget.
script.setAttribute("runllm-name", "vLLM"); script.setAttribute("runllm-name", "vLLM");
script.setAttribute("runllm-position", "BOTTOM_RIGHT"); script.setAttribute("runllm-position", "BOTTOM_RIGHT");
script.setAttribute("runllm-position-y", "20%"); script.setAttribute("runllm-position-y", "120px");
script.setAttribute("runllm-position-x", "3%"); script.setAttribute("runllm-position-x", "20px");
script.setAttribute("runllm-assistant-id", "207"); script.setAttribute("runllm-assistant-id", "207");
script.async = true; script.async = true;
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
We host regular meetups in San Francisco Bay Area every 2 months. We will share the project updates from the vLLM team and have guest speakers from the industry to share their experience and insights. Please find the materials of our previous meetups below: We host regular meetups in San Francisco Bay Area every 2 months. We will share the project updates from the vLLM team and have guest speakers from the industry to share their experience and insights. Please find the materials of our previous meetups below:
- [vLLM x Ollama Inference Night](https://lu.ma/vllm-ollama), March 27th 2025. [[Slides]](https://docs.google.com/presentation/d/16T2PDD1YwRnZ4Tu8Q5r6n53c5Lr5c73UV9Vd2_eBo4U/edit?usp=sharing).
- [The first vLLM China Meetup](https://mp.weixin.qq.com/s/n77GibL2corAtQHtVEAzfg), March 16th 2025. [[Slides]](https://docs.google.com/presentation/d/1REHvfQMKGnvz6p3Fd23HhSO4c8j5WPGZV0bKYLwnHyQ/edit?usp=sharing).
- [The East Coast vLLM Meetup](https://lu.ma/7mu4k4xx), March 11th 2025. [[Slides]](https://docs.google.com/presentation/d/1NHiv8EUFF1NLd3fEYODm56nDmL26lEeXCaDgyDlTsRs/edit#slide=id.g31441846c39_0_0) - [The East Coast vLLM Meetup](https://lu.ma/7mu4k4xx), March 11th 2025. [[Slides]](https://docs.google.com/presentation/d/1NHiv8EUFF1NLd3fEYODm56nDmL26lEeXCaDgyDlTsRs/edit#slide=id.g31441846c39_0_0)
- [The ninth vLLM meetup](https://lu.ma/h7g3kuj9), with Meta, February 27th 2025. [[Slides]](https://docs.google.com/presentation/d/1jzC_PZVXrVNSFVCW-V4cFXb6pn7zZ2CyP_Flwo05aqg/edit?usp=sharing) - [The ninth vLLM meetup](https://lu.ma/h7g3kuj9), with Meta, February 27th 2025. [[Slides]](https://docs.google.com/presentation/d/1jzC_PZVXrVNSFVCW-V4cFXb6pn7zZ2CyP_Flwo05aqg/edit?usp=sharing)
- [The eighth vLLM meetup](https://lu.ma/zep56hui), with Google Cloud, January 22nd 2025. [[Slides]](https://docs.google.com/presentation/d/1epVkt4Zu8Jz_S5OhEHPc798emsYh2BwYfRuDDVEF7u4/edit?usp=sharing) - [The eighth vLLM meetup](https://lu.ma/zep56hui), with Google Cloud, January 22nd 2025. [[Slides]](https://docs.google.com/presentation/d/1epVkt4Zu8Jz_S5OhEHPc798emsYh2BwYfRuDDVEF7u4/edit?usp=sharing)
......
...@@ -22,6 +22,7 @@ Compute Resources: ...@@ -22,6 +22,7 @@ Compute Resources:
- Databricks - Databricks
- DeepInfra - DeepInfra
- Google Cloud - Google Cloud
- Intel
- Lambda Lab - Lambda Lab
- Nebius - Nebius
- Novita AI - Novita AI
......
...@@ -103,6 +103,11 @@ myst_url_schemes = { ...@@ -103,6 +103,11 @@ myst_url_schemes = {
"title": "Pull Request #{{path}}", "title": "Pull Request #{{path}}",
"classes": ["github"], "classes": ["github"],
}, },
"gh-project": {
"url": "https://github.com/orgs/vllm-project/projects/{{path}}",
"title": "Project #{{path}}",
"classes": ["github"],
},
"gh-dir": { "gh-dir": {
"url": "https://github.com/vllm-project/vllm/tree/main/{{path}}", "url": "https://github.com/vllm-project/vllm/tree/main/{{path}}",
"title": "{{path}}", "title": "{{path}}",
......
# Dockerfile # Dockerfile
We provide a <gh-file:Dockerfile> to construct the image for running an OpenAI compatible server with vLLM. We provide a <gh-file:docker/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
More information about deploying with Docker can be found [here](#deployment-docker). More information about deploying with Docker can be found [here](#deployment-docker).
Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes: Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
...@@ -28,7 +28,7 @@ The edges of the build graph represent: ...@@ -28,7 +28,7 @@ The edges of the build graph represent:
> Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present): > Commands to regenerate the build graph (make sure to run it **from the \`root\` directory of the vLLM repository** where the dockerfile is present):
> >
> ```bash > ```bash
> dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename Dockerfile > dockerfilegraph -o png --legend --dpi 200 --max-label-length 50 --filename docker/Dockerfile
> ``` > ```
> >
> or in case you want to run it directly with the docker image: > or in case you want to run it directly with the docker image:
...@@ -43,7 +43,7 @@ The edges of the build graph represent: ...@@ -43,7 +43,7 @@ The edges of the build graph represent:
> --output png \ > --output png \
> --dpi 200 \ > --dpi 200 \
> --max-label-length 50 \ > --max-label-length 50 \
> --filename Dockerfile \ > --filename docker/Dockerfile \
> --legend > --legend
> ``` > ```
> >
......
...@@ -11,6 +11,15 @@ We also believe in the power of community support; thus, answering queries, offe ...@@ -11,6 +11,15 @@ We also believe in the power of community support; thus, answering queries, offe
Finally, one of the most impactful ways to support us is by raising awareness about vLLM. Talk about it in your blog posts and highlight how it's driving your incredible projects. Express your support on social media if you're using vLLM, or simply offer your appreciation by starring our repository! Finally, one of the most impactful ways to support us is by raising awareness about vLLM. Talk about it in your blog posts and highlight how it's driving your incredible projects. Express your support on social media if you're using vLLM, or simply offer your appreciation by starring our repository!
## Job Board
Unsure on where to start? Check out the following links for tasks to work on:
- [Good first issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22)
- [Selected onboarding tasks](gh-project:6)
- [New model requests](https://github.com/vllm-project/vllm/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22new%20model%22)
- [Models with multi-modal capabilities](gh-project:10)
## License ## License
See <gh-file:LICENSE>. See <gh-file:LICENSE>.
...@@ -35,6 +44,12 @@ pre-commit run --all-files ...@@ -35,6 +44,12 @@ pre-commit run --all-files
pytest tests/ pytest tests/
``` ```
:::{tip}
Since the <gh-file:docker/Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
:::
:::{note} :::{note}
Currently, the repository is not fully checked by `mypy`. Currently, the repository is not fully checked by `mypy`.
::: :::
......
...@@ -34,11 +34,11 @@ If you need to use those dependencies (having accepted the license terms), ...@@ -34,11 +34,11 @@ If you need to use those dependencies (having accepted the license terms),
create a custom Dockerfile on top of the base image with an extra layer that installs them: create a custom Dockerfile on top of the base image with an extra layer that installs them:
```Dockerfile ```Dockerfile
FROM vllm/vllm-openai:v0.8.0 FROM vllm/vllm-openai:v0.8.2
# e.g. install the `audio` and `video` optional dependencies # e.g. install the `audio` and `video` optional dependencies
# NOTE: Make sure the version of vLLM matches the base image! # NOTE: Make sure the version of vLLM matches the base image!
RUN uv pip install vllm[audio,video]==0.8.0 RUN uv pip install --system vllm[audio,video]==0.8.2
``` ```
::: :::
...@@ -52,7 +52,7 @@ with an extra layer that installs their code from source: ...@@ -52,7 +52,7 @@ with an extra layer that installs their code from source:
```Dockerfile ```Dockerfile
FROM vllm/vllm-openai:latest FROM vllm/vllm-openai:latest
RUN uv pip install git+https://github.com/huggingface/transformers.git RUN uv pip install --system git+https://github.com/huggingface/transformers.git
``` ```
::: :::
...@@ -61,11 +61,11 @@ RUN uv pip install git+https://github.com/huggingface/transformers.git ...@@ -61,11 +61,11 @@ RUN uv pip install git+https://github.com/huggingface/transformers.git
## Building vLLM's Docker Image from Source ## Building vLLM's Docker Image from Source
You can build and run vLLM from source via the provided <gh-file:Dockerfile>. To build vLLM: You can build and run vLLM from source via the provided <gh-file:docker/Dockerfile>. To build vLLM:
```console ```console
# optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2 # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile
``` ```
:::{note} :::{note}
...@@ -92,6 +92,7 @@ Keep an eye on memory usage with parallel jobs as it can be substantial (see exa ...@@ -92,6 +92,7 @@ Keep an eye on memory usage with parallel jobs as it can be substantial (see exa
# Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB) # Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
$ python3 use_existing_torch.py $ python3 use_existing_torch.py
$ DOCKER_BUILDKIT=1 docker build . \ $ DOCKER_BUILDKIT=1 docker build . \
--file docker/Dockerfile \
--target vllm-openai \ --target vllm-openai \
--platform "linux/arm64" \ --platform "linux/arm64" \
-t vllm/vllm-gh200-openai:latest \ -t vllm/vllm-gh200-openai:latest \
......
...@@ -69,14 +69,14 @@ server { ...@@ -69,14 +69,14 @@ server {
```console ```console
cd $vllm_root cd $vllm_root
docker build -f Dockerfile . --tag vllm docker build -f docker/Dockerfile . --tag vllm
``` ```
If you are behind proxy, you can pass the proxy settings to the docker build command as shown below: If you are behind proxy, you can pass the proxy settings to the docker build command as shown below:
```console ```console
cd $vllm_root cd $vllm_root
docker build -f Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
``` ```
(nginxloadbalancer-nginx-docker-network)= (nginxloadbalancer-nginx-docker-network)=
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment