Add Dockerfile (#1350)

9cabcb76 · Stephen Krider · GitHub · 7b895c59 · 9cabcb76 · 9cabcb76
Unverified Commit 9cabcb76 authored Oct 31, 2023 by Stephen Krider Committed by GitHub Oct 31, 2023
5 changed files
--- a/Dockerfile
+++ b/Dockerfile
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS dev
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+WORKDIR /workspace
+# install build and runtime dependencies
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+# install development dependencies
+COPY requirements-dev.txt requirements-dev.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-dev.txt
+# image to build pytorch extensions
+FROM dev AS build
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY requirements.txt requirements.txt
+COPY pyproject.toml pyproject.toml
+COPY vllm/__init__.py vllm/__init__.py
+# max jobs used by Ninja to build extensions
+ENV MAX_JOBS=$max_jobs 
+RUN python3 setup.py build_ext --inplace
+# image to run unit testing suite
+FROM dev AS test
+# copy pytorch extensions separately to avoid having to rebuild
+# when python code changes
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY tests tests
+COPY vllm vllm
+ENTRYPOINT ["python3", "-m", "pytest", "tests"]
+# use CUDA base as CUDA runtime dependencies are already installed via pip
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS vllm-base
+# libnccl required for ray
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+WORKDIR /workspace
+COPY requirements.txt requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+FROM vllm-base AS vllm
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+EXPOSE 8000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
+# openai api server alternative
+FROM vllm-base AS vllm-openai
+# install additional dependencies for openai api server
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install accelerate fschat
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -65,6 +65,7 @@ Documentation
   serving/distributed_serving
   serving/run_on_sky
   serving/deploying_with_triton
+   serving/deploying_with_docker
 .. toctree::
   :maxdepth: 1

--- a/docs/source/serving/deploying_with_docker.rst
+++ b/docs/source/serving/deploying_with_docker.rst
+.. _deploying_with_docker:
+Deploying with Docker
+============================
+You can build and run vLLM from source via the provided dockerfile. To build vLLM:
+.. code-block:: console
+    $ DOCKER_BUILDKIT=1 docker build . --target vllm --tag vllm --build-arg max_jobs=8
+To run vLLM:
+.. code-block:: console
+    $ docker run --runtime nvidia --gpus all \
+        -v ~/.cache/huggingface:/root/.cache/huggingface \
+        -p 8000:8000 \
+        --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+        vllm <args...>
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -12,3 +12,4 @@ types-setuptools
 pytest
 pytest-forked
 pytest-asyncio
--- a/setup.py
+++ b/setup.py
@@ -239,8 +239,12 @@ def find_version(filepath: str):
 def read_readme() -> str:
-    """Read the README file."""
+    """Read the README file if present."""
-    return io.open(get_path("README.md"), "r", encoding="utf-8").read()
+    p = get_path("README.md")
+    if os.path.isfile(p):
+        return io.open(get_path("README.md"), "r", encoding="utf-8").read()
+    else:
+        return ""
 def get_requirements() -> List[str]: