"magic_pdf/vscode:/vscode.git/clone" did not exist on "e4504cb360814261b4881bf232b15269e28dd019"
Dockerfile 2.93 KB
Newer Older
1
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
Stephen Krider's avatar
Stephen Krider committed
2
3
4
5
6
7
8
9
10
11

RUN apt-get update -y \
    && apt-get install -y python3-pip

WORKDIR /workspace

# install build and runtime dependencies
COPY requirements.txt requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements.txt
12

Stephen Krider's avatar
Stephen Krider committed
13
14
15
16
17
18
19
20
# install development dependencies
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements-dev.txt

# image to build pytorch extensions
FROM dev AS build

21
22
23
24
25
# install build dependencies
COPY requirements-build.txt requirements-build.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements-build.txt

Stephen Krider's avatar
Stephen Krider committed
26
27
28
29
30
31
32
# copy input files
COPY csrc csrc
COPY setup.py setup.py
COPY requirements.txt requirements.txt
COPY pyproject.toml pyproject.toml
COPY vllm/__init__.py vllm/__init__.py

33
34
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Stephen Krider's avatar
Stephen Krider committed
35
# max jobs used by Ninja to build extensions
36
37
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
38
39
40
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
41

Stephen Krider's avatar
Stephen Krider committed
42
43
RUN python3 setup.py build_ext --inplace

44
45
46
47
48
49
50
51
# Build the megablocks library as wheel because it doesn't publish pre-built wheels.
# https://github.com/stanford-futuredata/megablocks/commit/5897cd6f254b7b3edf7a708a3a3314ecb54b6f78
RUN apt-get install -y git && \
    git clone https://github.com/stanford-futuredata/megablocks.git && \
    cd megablocks && \
    git checkout 5897cd6f254b7b3edf7a708a3a3314ecb54b6f78 && \
    MAX_JOBS=8 NVCC_THREADS=8 python3 setup.py bdist_wheel

Stephen Krider's avatar
Stephen Krider committed
52
53
54
55
56
57
58
59
60
61
62
63
# image to run unit testing suite
FROM dev AS test

# copy pytorch extensions separately to avoid having to rebuild
# when python code changes
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY tests tests
COPY vllm vllm

ENTRYPOINT ["python3", "-m", "pytest", "tests"]

# use CUDA base as CUDA runtime dependencies are already installed via pip
64
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base
Stephen Krider's avatar
Stephen Krider committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

# libnccl required for ray
RUN apt-get update -y \
    && apt-get install -y python3-pip

WORKDIR /workspace
COPY requirements.txt requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements.txt

FROM vllm-base AS vllm
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY vllm vllm

EXPOSE 8000
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]

# openai api server alternative
FROM vllm-base AS vllm-openai
84
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
85
RUN --mount=type=cache,target=/root/.cache/pip \
86
    pip install accelerate
Stephen Krider's avatar
Stephen Krider committed
87
88

COPY vllm vllm
89
90
91
92
93
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
COPY --from=build /workspace/megablocks/dist/*.whl /tmp/
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl && \
    rm /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl
Stephen Krider's avatar
Stephen Krider committed
94
95
96

ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]